From f99d4d2af31e480a73e71d26737d6060bd55d5c8 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Thu, 12 Mar 2026 14:16:21 +0100 Subject: [PATCH 01/36] install deepvariant subworkflow --- modules.json | 20 + .../nf-core/deepvariant/callvariants/main.nf | 50 +++ .../nf-core/deepvariant/callvariants/meta.yml | 68 +++ .../callvariants/tests/main.nf.test | 84 ++++ .../callvariants/tests/main.nf.test.snap | 55 +++ .../callvariants/tests/nextflow.config | 11 + .../nf-core/deepvariant/makeexamples/main.nf | 58 +++ .../nf-core/deepvariant/makeexamples/meta.yml | 135 ++++++ .../makeexamples/tests/main.nf.test | 204 +++++++++ .../makeexamples/tests/main.nf.test.snap | 178 ++++++++ .../makeexamples/tests/nextflow.config | 6 + .../deepvariant/postprocessvariants/main.nf | 86 ++++ .../deepvariant/postprocessvariants/meta.yml | 155 +++++++ .../postprocessvariants/tests/main.nf.test | 123 +++++ .../tests/main.nf.test.snap | 196 ++++++++ .../postprocessvariants/tests/nextflow.config | 10 + subworkflows/nf-core/deepvariant/README.md | 8 + subworkflows/nf-core/deepvariant/main.nf | 46 ++ subworkflows/nf-core/deepvariant/meta.yml | 77 ++++ ...nt-workflow-and-process-equality-tester.nf | 22 + .../tests/disable-small-model.conf | 8 + .../deepvariant/tests/equality.nf.test | 63 +++ .../nf-core/deepvariant/tests/main.nf.test | 152 +++++++ .../deepvariant/tests/main.nf.test.snap | 419 ++++++++++++++++++ .../nf-core/deepvariant/tests/nextflow.config | 14 + 25 files changed, 2248 insertions(+) create mode 100644 modules/nf-core/deepvariant/callvariants/main.nf create mode 100644 modules/nf-core/deepvariant/callvariants/meta.yml create mode 100644 modules/nf-core/deepvariant/callvariants/tests/main.nf.test create mode 100644 modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap create mode 100644 modules/nf-core/deepvariant/callvariants/tests/nextflow.config create mode 100644 modules/nf-core/deepvariant/makeexamples/main.nf create mode 100644 modules/nf-core/deepvariant/makeexamples/meta.yml create mode 100644 modules/nf-core/deepvariant/makeexamples/tests/main.nf.test create mode 100644 modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap create mode 100644 modules/nf-core/deepvariant/makeexamples/tests/nextflow.config create mode 100644 modules/nf-core/deepvariant/postprocessvariants/main.nf create mode 100644 modules/nf-core/deepvariant/postprocessvariants/meta.yml create mode 100644 modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test create mode 100644 modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test.snap create mode 100644 modules/nf-core/deepvariant/postprocessvariants/tests/nextflow.config create mode 100644 subworkflows/nf-core/deepvariant/README.md create mode 100644 subworkflows/nf-core/deepvariant/main.nf create mode 100644 subworkflows/nf-core/deepvariant/meta.yml create mode 100644 subworkflows/nf-core/deepvariant/tests/deepvariant-workflow-and-process-equality-tester.nf create mode 100644 subworkflows/nf-core/deepvariant/tests/disable-small-model.conf create mode 100644 subworkflows/nf-core/deepvariant/tests/equality.nf.test create mode 100644 subworkflows/nf-core/deepvariant/tests/main.nf.test create mode 100644 subworkflows/nf-core/deepvariant/tests/main.nf.test.snap create mode 100644 subworkflows/nf-core/deepvariant/tests/nextflow.config diff --git a/modules.json b/modules.json index 100cd8a1..084a0f43 100644 --- a/modules.json +++ b/modules.json @@ -27,6 +27,21 @@ "installed_by": ["modules", "vcf_gather_bcftools"], "patch": "modules/nf-core/bcftools/sort/bcftools-sort.diff" }, + "deepvariant/callvariants": { + "branch": "master", + "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", + "installed_by": ["deepvariant", "modules"] + }, + "deepvariant/makeexamples": { + "branch": "master", + "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", + "installed_by": ["deepvariant", "modules"] + }, + "deepvariant/postprocessvariants": { + "branch": "master", + "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", + "installed_by": ["deepvariant", "modules"] + }, "ensemblvep/download": { "branch": "master", "git_sha": "90cdd21fd96ccbdb3bc90797ca69570d18391055", @@ -141,6 +156,11 @@ "git_sha": "7ac6cbe7c17c2dad685da7f70496c8f48ea48687", "installed_by": ["subworkflows"] }, + "deepvariant": { + "branch": "master", + "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", + "installed_by": ["subworkflows"] + }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", diff --git a/modules/nf-core/deepvariant/callvariants/main.nf b/modules/nf-core/deepvariant/callvariants/main.nf new file mode 100644 index 00000000..2fc656ee --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/main.nf @@ -0,0 +1,50 @@ + +process DEEPVARIANT_CALLVARIANTS { + tag "$meta.id" + label 'process_high' + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" + + input: + tuple val(meta), path(make_examples_tfrecords) + + output: + tuple val(meta), path("${prefix}.call-*-of-*.tfrecord.gz"), emit: call_variants_tfrecords + tuple val("${task.process}"), val('deepvariant'), eval("/opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //'"), topic: versions, emit: versions_deepvariant + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def matcher = make_examples_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ + if (!matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + make_examples_tfrecords[0].baseName + "' doesn't match the expected pattern") + } + def examples_tfrecord_name = matcher[0][1] + def shardCount = matcher[0][2] + // Reconstruct the logical name - ${tfrecord_name}.examples.tfrecord@${task.cpus}.gz + def examples_tfrecords_logical_name = "${examples_tfrecord_name}@${shardCount}.gz" + + """ + /opt/deepvariant/bin/call_variants \\ + ${args} \\ + --outfile "${prefix}.call.tfrecord.gz" \\ + --examples "${examples_tfrecords_logical_name}" + + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.call-00000-of-00001.tfrecord.gz + + """ +} diff --git a/modules/nf-core/deepvariant/callvariants/meta.yml b/modules/nf-core/deepvariant/callvariants/meta.yml new file mode 100644 index 00000000..fa1aaa42 --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/meta.yml @@ -0,0 +1,68 @@ +name: deepvariant_callvariants +description: Call variants from the examples produced by make_examples +keywords: + - variant calling + - machine learning + - neural network +tools: + - deepvariant: + description: DeepVariant is an analysis pipeline that uses a deep neural network + to call genetic variants from next-generation DNA sequencing data + homepage: https://github.com/google/deepvariant + documentation: https://github.com/google/deepvariant + tool_dev_url: https://github.com/google/deepvariant + doi: "10.1038/nbt.4235" + licence: ["BSD-3-clause"] + identifier: biotools:deepvariant +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - make_examples_tfrecords: + type: file + description: The actual sharded input files, from DEEPVARIANT_MAKEEXAMPLES process + pattern: "*.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format +output: + call_variants_tfrecords: + - - meta: + type: list + description: | + Each output contains: unique ID string from input channel, meta, tfrecord file with variant calls. + - ${prefix}.call-*-of-*.tfrecord.gz: + type: list + description: | + Each output contains: unique ID string from input channel, meta, tfrecord file with variant calls. + versions_deepvariant: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool + +authors: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/deepvariant/callvariants/tests/main.nf.test b/modules/nf-core/deepvariant/callvariants/tests/main.nf.test new file mode 100644 index 00000000..d617650b --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/tests/main.nf.test @@ -0,0 +1,84 @@ +nextflow_process { + + name "Test Process DEEPVARIANT_CALLVARIANTS" + script "../main.nf" + config "./nextflow.config" + process "DEEPVARIANT_CALLVARIANTS" + + tag "deepvariant/makeexamples" + tag "deepvariant/callvariants" + tag "deepvariant" + tag "modules" + tag "modules_nfcore" + + test("homo_sapiens - wgs") { + setup { + run("DEEPVARIANT_MAKEEXAMPLES") { + script "../../makeexamples/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + } + when { + process { + """ + input[0] = DEEPVARIANT_MAKEEXAMPLES.out.examples + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.call_variants_tfrecords.get(0).get(0) == [ id:'test', single_end:false ] }, + // The tfrecord binary representation is not stable, but we check the name of the output. + { assert snapshot(file(process.out.call_variants_tfrecords.get(0).get(1)).name).match("homo_sapiens-wgs-call_variants_tfrecords-filenames")}, + ) + } + } + + test("homo_sapiens - wgs - stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta + [] // No input paths are needed in stub mode + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + +} diff --git a/modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap b/modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap new file mode 100644 index 00000000..ce71dac2 --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap @@ -0,0 +1,55 @@ +{ + "homo_sapiens-wgs-call_variants_tfrecords-filenames": { + "content": [ + "test.call-00000-of-00001.tfrecord.gz" + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.4" + }, + "timestamp": "2024-09-04T17:04:33.276938" + }, + "homo_sapiens - wgs - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.call-00000-of-00001.tfrecord.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + "DEEPVARIANT_CALLVARIANTS", + "deepvariant", + "1.9.0" + ] + ], + "call_variants_tfrecords": [ + [ + { + "id": "test", + "single_end": false + }, + "test.call-00000-of-00001.tfrecord.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_deepvariant": [ + [ + "DEEPVARIANT_CALLVARIANTS", + "deepvariant", + "1.9.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:02:54.403068431" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepvariant/callvariants/tests/nextflow.config b/modules/nf-core/deepvariant/callvariants/tests/nextflow.config new file mode 100644 index 00000000..68aec144 --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/tests/nextflow.config @@ -0,0 +1,11 @@ +process { + withName: "DEEPVARIANT_CALLVARIANTS" { + ext.args = '--checkpoint "/opt/models/wgs"' + cpus = 2 // Keep CPUs fixed so the number of output files is reproducible + } +} +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"' + } +} diff --git a/modules/nf-core/deepvariant/makeexamples/main.nf b/modules/nf-core/deepvariant/makeexamples/main.nf new file mode 100644 index 00000000..77d2f331 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/main.nf @@ -0,0 +1,58 @@ +process DEEPVARIANT_MAKEEXAMPLES { + tag "$meta.id" + label 'process_high' + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" + + input: + tuple val(meta), path(input), path(index), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + tuple val(meta5), path(par_bed) + + output: + tuple val(meta), path("${prefix}.examples.tfrecord-*-of-*.gz{,.example_info.json}"), emit: examples + tuple val(meta), path("${prefix}.gvcf.tfrecord-*-of-*.gz"), emit: gvcf + tuple val(meta), path("${prefix}_call_variant_outputs.examples.tfrecord-*-of-*.gz", arity: "0..*"), emit: small_model_calls + tuple val("${task.process}"), val('deepvariant'), eval("/opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //'"), topic: versions, emit: versions_deepvariant + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def regions = intervals ? "--regions ${intervals}" : "" + def par_regions = par_bed ? "--par_regions_bed=${par_bed}" : "" + + """ + seq 0 ${task.cpus - 1} | parallel -q --halt 2 --line-buffer /opt/deepvariant/bin/make_examples \\ + --mode calling \\ + --ref "${fasta}" \\ + --reads "${input}" \\ + --examples "./${prefix}.examples.tfrecord@${task.cpus}.gz" \\ + --gvcf "./${prefix}.gvcf.tfrecord@${task.cpus}.gz" \\ + ${regions} \\ + ${par_regions} \\ + ${args} \\ + --task {} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + printf -v SHARD_COUNT "%04d" ${task.cpus} + for i in \$( seq -f "%04g" 0 ${task.cpus-1} ) + do + echo "" | gzip > ${prefix}.examples.tfrecord-\$i-of-\$SHARD_COUNT.tfrecord.gz + touch ${prefix}.examples.tfrecord-\$i-of-\$SHARD_COUNT.tfrecord.gz.example_info.json + echo "" | gzip > ${prefix}.gvcf.tfrecord-\$i-of-\$SHARD_COUNT.tfrecord.gz + done + """ +} diff --git a/modules/nf-core/deepvariant/makeexamples/meta.yml b/modules/nf-core/deepvariant/makeexamples/meta.yml new file mode 100644 index 00000000..12056fbd --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/meta.yml @@ -0,0 +1,135 @@ +name: deepvariant_makeexamples +description: Transforms the input alignments to a format suitable for the deep neural + network variant caller +keywords: + - variant calling + - machine learning + - neural network +tools: + - deepvariant: + description: DeepVariant is an analysis pipeline that uses a deep neural network + to call genetic variants from next-generation DNA sequencing data + homepage: https://github.com/google/deepvariant + documentation: https://github.com/google/deepvariant + tool_dev_url: https://github.com/google/deepvariant + doi: "10.1038/nbt.4235" + licence: ["BSD-3-clause"] + identifier: biotools:deepvariant +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: BAM/CRAM file + pattern: "*.bam/cram" + ontologies: [] + - index: + type: file + description: Index of BAM/CRAM file + pattern: "*.bai/crai" + ontologies: [] + - intervals: + type: file + description: Interval file for targeted regions + pattern: "*.bed" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - gzi: + type: file + description: GZI index of reference fasta file + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + pattern: "*.gzi" + - par_bed: + type: file + description: BED file containing PAR regions + pattern: "*.bed" + ontologies: [] +output: + examples: + - - meta: + type: list + description: | + Tuple containing sample metadata and examples that can be used for calling + - ${prefix}.examples.tfrecord-*-of-*.gz{,.example_info.json}: + type: list + description: | + Tuple containing sample metadata and examples that can be used for calling + gvcf: + - - meta: + type: list + description: | + Tuple containing sample metadata and examples that can be used for calling + - ${prefix}.gvcf.tfrecord-*-of-*.gz: + type: list + description: | + Tuple containing sample metadata and the GVCF data in tfrecord format + small_model_calls: + - - meta: + type: list + description: | + Tuple containing sample metadata and examples that can be used for calling + - ${prefix}_call_variant_outputs.examples.tfrecord-*-of-*.gz: + type: list + description: | + Optional variant calls from the small model, if enabled, in tfrecord format + versions_deepvariant: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test b/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test new file mode 100644 index 00000000..cc06f780 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test @@ -0,0 +1,204 @@ +nextflow_process { + + name "Test Process DEEPVARIANT_MAKEEXAMPLES" + script "../main.nf" + config "./nextflow.config" + process "DEEPVARIANT_MAKEEXAMPLES" + + tag "deepvariant/makeexamples" + tag "deepvariant" + tag "modules" + tag "modules_nfcore" + + test("homo_sapiens - [bam, bai] - fasta - fai") { + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + + { assert process.out.examples[0][0] == [id:'test', single_end:false] }, + { assert process.out.gvcf[0][0] == [id:'test', single_end:false] }, + { assert process.out.examples[0][1].size() == 4 }, + { assert snapshot( + process.out.examples[0][1].collect { file(it).name } + ).match("test1-examples-filenames") }, + { assert process.out.gvcf[0][1].size() == 2 }, + { assert snapshot(process.out.versions_deepvariant).match("test1-versions") }, + { assert snapshot( + process.out.gvcf[0][1].collect { file(it).name } + ).match("test1-gvcf-filenames") } + ) + } + } + + test("homo_sapiens - [cram, crai, genome_bed] - fasta - fai") { + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.examples[0][0] == [id:'test', single_end:false] }, + { assert process.out.gvcf[0][0] == [id:'test', single_end:false] }, + { assert process.out.examples[0][1].size() == 4 }, + { assert snapshot( + process.out.examples[0][1].collect { file(it).name } + ).match("test2-examples-filenames") }, + { assert process.out.gvcf[0][1].size() == 2 }, + { assert snapshot(process.out.versions_deepvariant).match("test2-versions") }, + { assert snapshot( + process.out.gvcf[0][1].collect { file(it).name } + ).match("test2-gvcf-filenames") } + ) + } + } + + test("homo_sapiens - [bam, bai] - fasta_gz - fasta_gz_fai") { + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz.fai', checkIfExists: true) + ] + input[3] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.gz.gzi', checkIfExists: true) + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.examples[0][0] == [id:'test', single_end:false] }, + { assert process.out.gvcf[0][0] == [id:'test', single_end:false] }, + { assert process.out.examples[0][1].size() == 4 }, + { assert snapshot( + process.out.examples[0][1].collect { file(it).name } + ).match("test3-examples-filenames") }, + { assert process.out.gvcf[0][1].size() == 2 }, + { assert snapshot(process.out.versions_deepvariant).match("test3-versions") }, + { assert snapshot( + process.out.gvcf[0][1].collect { file(it).name } + ).match("test3-gvcf-filenames") } + ) + } + } + + test("stub") { + + options "-stub" + + when { + + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert process.out.gvcf[0][0] == [id:'test', single_end:false] }, + { assert process.out.examples[0][1].size() == 4 }, + { assert snapshot( + process.out.examples[0][1].collect { file(it).name } + ).match("test4-examples-filenames") }, + { assert process.out.gvcf[0][1].size() == 2 }, + { assert snapshot(process.out.versions_deepvariant).match("test4-versions") }, + { assert snapshot( + process.out.gvcf[0][1].collect { file(it).name } + ).match("test4-gvcf-filenames") } + ) + } + } +} diff --git a/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap b/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap new file mode 100644 index 00000000..729f0dc5 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap @@ -0,0 +1,178 @@ +{ + "test1-gvcf-filenames": { + "content": [ + [ + "test.gvcf.tfrecord-00000-of-00002.gz", + "test.gvcf.tfrecord-00001-of-00002.gz" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:12:57.93412258" + }, + "test3-versions": { + "content": [ + [ + [ + "DEEPVARIANT_MAKEEXAMPLES", + "deepvariant", + "1.9.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:09:55.034298895" + }, + "test2-examples-filenames": { + "content": [ + [ + "test.examples.tfrecord-00000-of-00002.gz", + "test.examples.tfrecord-00000-of-00002.gz.example_info.json", + "test.examples.tfrecord-00001-of-00002.gz", + "test.examples.tfrecord-00001-of-00002.gz.example_info.json" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:14:02.467533548" + }, + "test1-examples-filenames": { + "content": [ + [ + "test.examples.tfrecord-00000-of-00002.gz", + "test.examples.tfrecord-00000-of-00002.gz.example_info.json", + "test.examples.tfrecord-00001-of-00002.gz", + "test.examples.tfrecord-00001-of-00002.gz.example_info.json" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:12:57.790379812" + }, + "test2-versions": { + "content": [ + [ + [ + "DEEPVARIANT_MAKEEXAMPLES", + "deepvariant", + "1.9.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:07:52.050411549" + }, + "test4-versions": { + "content": [ + [ + [ + "DEEPVARIANT_MAKEEXAMPLES", + "deepvariant", + "1.9.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:12:07.012233232" + }, + "test4-examples-filenames": { + "content": [ + [ + "test.examples.tfrecord-0000-of-0002.tfrecord.gz", + "test.examples.tfrecord-0000-of-0002.tfrecord.gz.example_info.json", + "test.examples.tfrecord-0001-of-0002.tfrecord.gz", + "test.examples.tfrecord-0001-of-0002.tfrecord.gz.example_info.json" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:15:58.286077155" + }, + "test1-versions": { + "content": [ + [ + [ + "DEEPVARIANT_MAKEEXAMPLES", + "deepvariant", + "1.9.0" + ] + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:05:28.75651648" + }, + "test3-gvcf-filenames": { + "content": [ + [ + "test.gvcf.tfrecord-00000-of-00002.gz", + "test.gvcf.tfrecord-00001-of-00002.gz" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:15:03.780115231" + }, + "test3-examples-filenames": { + "content": [ + [ + "test.examples.tfrecord-00000-of-00002.gz", + "test.examples.tfrecord-00000-of-00002.gz.example_info.json", + "test.examples.tfrecord-00001-of-00002.gz", + "test.examples.tfrecord-00001-of-00002.gz.example_info.json" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:15:03.702565392" + }, + "test2-gvcf-filenames": { + "content": [ + [ + "test.gvcf.tfrecord-00000-of-00002.gz", + "test.gvcf.tfrecord-00001-of-00002.gz" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:14:02.550236324" + }, + "test4-gvcf-filenames": { + "content": [ + [ + "test.gvcf.tfrecord-0000-of-0002.tfrecord.gz", + "test.gvcf.tfrecord-0001-of-0002.tfrecord.gz" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-01T02:15:58.412547051" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepvariant/makeexamples/tests/nextflow.config b/modules/nf-core/deepvariant/makeexamples/tests/nextflow.config new file mode 100644 index 00000000..6811fe48 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + cpus = 2 // The number of output files is determined by cpus - keep it the same for tests + ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"' + } +} diff --git a/modules/nf-core/deepvariant/postprocessvariants/main.nf b/modules/nf-core/deepvariant/postprocessvariants/main.nf new file mode 100644 index 00000000..0830f9ac --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/main.nf @@ -0,0 +1,86 @@ +process DEEPVARIANT_POSTPROCESSVARIANTS { + tag "$meta.id" + label 'process_medium' + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" + + input: + tuple val(meta), path(variant_calls_tfrecord_files), path(gvcf_tfrecords), path(small_model_calls), path(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.{tbi,csi}") , emit: vcf_index + tuple val(meta), path("${prefix}.g.vcf.gz") , emit: gvcf + tuple val(meta), path("${prefix}.g.vcf.gz.{tbi,csi}") , emit: gvcf_index + tuple val("${task.process}"), val('deepvariant'), eval("/opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //'"), topic: versions, emit: versions_deepvariant + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def regions = intervals ? "--regions ${intervals}" : "" + def variant_calls_tfrecord_name = variant_calls_tfrecord_files[0].name.replaceFirst(/-\d{5}-of-\d{5}/, "") + + def gvcf_matcher = gvcf_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ + if (!gvcf_matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + gvcf_tfrecords[0].baseName + "' doesn't match the expected pattern") + } + def gvcf_tfrecord_name = gvcf_matcher[0][1] + def gvcf_shardCount = gvcf_matcher[0][2] + // Reconstruct the logical name - ${tfrecord_name}.examples.tfrecord@${task.cpus}.gz + def gvcf_tfrecords_logical_name = "${gvcf_tfrecord_name}@${gvcf_shardCount}.gz" + + // The following block determines whether the small model was used, and if so, adds the variant calls from it + // to the argument --small_model_cvo_records. + def small_model_arg = "" + if (small_model_calls) { + small_model_matcher = (small_model_calls[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/) + if (!small_model_matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + small_model_calls[0].baseName + "' doesn't match the expected pattern") + } + small_model_tfrecord_name = small_model_matcher[0][1] + small_model_shardCount = small_model_matcher[0][2] + // Reconstruct the logical name. Example: test_call_variant_outputs.examples.tfrecord@12.gz + small_model_tfrecords_logical_name = "${small_model_tfrecord_name}@${small_model_shardCount}.gz" + small_model_arg = "--small_model_cvo_records ${small_model_tfrecords_logical_name}" + } + + """ + /opt/deepvariant/bin/postprocess_variants \\ + ${args} \\ + --ref "${fasta}" \\ + --infile "${variant_calls_tfrecord_name}" \\ + --outfile "${prefix}.vcf.gz" \\ + --nonvariant_site_tfrecord_path "${gvcf_tfrecords_logical_name}" \\ + --gvcf_outfile "${prefix}.g.vcf.gz" \\ + ${regions} \\ + ${small_model_arg} \\ + --cpus $task.cpus + + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + echo "" | gzip > ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi + + """ +} diff --git a/modules/nf-core/deepvariant/postprocessvariants/meta.yml b/modules/nf-core/deepvariant/postprocessvariants/meta.yml new file mode 100644 index 00000000..4a087011 --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/meta.yml @@ -0,0 +1,155 @@ +name: deepvariant_postprocessvariants +description: DeepVariant is an analysis pipeline that uses a deep neural network to + call genetic variants from next-generation DNA sequencing data +keywords: + - variant calling + - machine learning + - neural network +tools: + - deepvariant: + description: DeepVariant is an analysis pipeline that uses a deep neural network + to call genetic variants from next-generation DNA sequencing data + homepage: https://github.com/google/deepvariant + documentation: https://github.com/google/deepvariant + tool_dev_url: https://github.com/google/deepvariant + doi: "10.1038/nbt.4235" + licence: ["BSD-3-clause"] + identifier: biotools:deepvariant +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - variant_calls_tfrecord_files: + type: file + description: | + One or more data files containing variant calls from DEEPVARIANT_CALLVARIANTS + pattern: "*.tfrecord.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - gvcf_tfrecords: + type: file + description: | + Sharded tfrecord file from DEEPVARIANT_MAKEEXAMPLES with the coverage information used for GVCF output + pattern: "*.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - small_model_calls: + type: file + description: | + Sharded tfrecord file from DEEPVARIANT_MAKEEXAMPLES with variant calls from the small model + pattern: "*.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + - intervals: + type: file + description: Interval file for targeted regions + pattern: "*.bed" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fai" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - gzi: + type: file + description: GZI index of reference fasta file + pattern: "*.gzi" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.vcf.gz: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + vcf_index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.vcf.gz.{tbi,csi}: + type: file + description: Index for VCF + pattern: "$*.vcf.gz.{tbi,csi}" + ontologies: [] + gvcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.g.vcf.gz: + type: file + description: Compressed GVCF file + pattern: "*.g.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + gvcf_index: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}.g.vcf.gz.{tbi,csi}: + type: file + description: Index for GVCF + pattern: "*.g.vcf.gz.{tbi,csi}" + ontologies: [] + versions_deepvariant: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool + +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - deepvariant: + type: string + description: The tool name + - /opt/deepvariant/bin/run_deepvariant --version | sed 's/^.*version //': + type: eval + description: The expression to obtain the version of the tool +authors: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test b/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test new file mode 100644 index 00000000..ef9110b0 --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test @@ -0,0 +1,123 @@ +nextflow_process { + + name "Test Process DEEPVARIANT_POSTPROCESSVARIANTS" + script "../main.nf" + process "DEEPVARIANT_POSTPROCESSVARIANTS" + config "./nextflow.config" + + tag "deepvariant/makeexamples" + tag "deepvariant/callvariants" + tag "deepvariant/postprocessvariants" + tag "deepvariant" + tag "modules" + tag "modules_nfcore" + + test("homo_sapiens - wgs") { + setup { + run("DEEPVARIANT_MAKEEXAMPLES") { + script "../../makeexamples/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [], + + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + run("DEEPVARIANT_CALLVARIANTS") { + script "../../callvariants/main.nf" + process { + """ + input[0] = DEEPVARIANT_MAKEEXAMPLES.out.examples + """ + } + } + } + when { + process { + """ + input[0] = DEEPVARIANT_CALLVARIANTS.out.call_variants_tfrecords.join( + DEEPVARIANT_MAKEEXAMPLES.out.gvcf, + failOnMismatch: true + ).join( + DEEPVARIANT_MAKEEXAMPLES.out.small_model_calls, + failOnMismatch: true + ).map { meta, tf, gvcf, small_model_calls -> [ meta, tf, gvcf, small_model_calls, [] ] } + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("homo_sapiens - wgs - stub") { + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], + [], + [], + [], + [], + ] + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("stub") } + ) + } + } + +} diff --git a/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test.snap b/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test.snap new file mode 100644 index 00000000..a981cf84 --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test.snap @@ -0,0 +1,196 @@ +{ + "stub": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "4": [ + [ + "DEEPVARIANT_POSTPROCESSVARIANTS", + "deepvariant", + "1.9.0" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_deepvariant": [ + [ + "DEEPVARIANT_POSTPROCESSVARIANTS", + "deepvariant", + "1.9.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:22:12.888323156" + }, + "homo_sapiens - wgs": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ] + ], + "4": [ + [ + "DEEPVARIANT_POSTPROCESSVARIANTS", + "deepvariant", + "1.9.0" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ] + ], + "versions_deepvariant": [ + [ + "DEEPVARIANT_POSTPROCESSVARIANTS", + "deepvariant", + "1.9.0" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-02-02T17:19:32.037352523" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepvariant/postprocessvariants/tests/nextflow.config b/modules/nf-core/deepvariant/postprocessvariants/tests/nextflow.config new file mode 100644 index 00000000..b8f3f47a --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/tests/nextflow.config @@ -0,0 +1,10 @@ +process { + withName: "DEEPVARIANT_CALLVARIANTS" { + ext.args = '--checkpoint "/opt/models/wgs"' + } +} +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"' + } +} diff --git a/subworkflows/nf-core/deepvariant/README.md b/subworkflows/nf-core/deepvariant/README.md new file mode 100644 index 00000000..6f816c22 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/README.md @@ -0,0 +1,8 @@ +# DeepVariant subworkflow + +Usage: the input channel should contain tuples of three elements: `meta`, an alignment file in bam or +cram format, and a corresponding index. + +It is very important that the input channel's `meta` is unique for all the input elements, because the subworkflow does a join on `meta`. + +Please note the important configuration items listed in the `deepvariant` module's README file. It is required to use the configuration to specify the input "channels" (data types to extract from bam file) for `DEEPVARIANT_MAKEEXAMPLES`, and the model to run for `DEEPVARIANT_CALLVARIANTS`. The correct arguments for a specific model (data type) can be determined by manually using the `run_deepvariant` command from the Docker / Singularity image with the `--dry_run` option. diff --git a/subworkflows/nf-core/deepvariant/main.nf b/subworkflows/nf-core/deepvariant/main.nf new file mode 100644 index 00000000..439cbc90 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/main.nf @@ -0,0 +1,46 @@ +include { DEEPVARIANT_MAKEEXAMPLES } from '../../../modules/nf-core/deepvariant/makeexamples/main' +include { DEEPVARIANT_CALLVARIANTS } from '../../../modules/nf-core/deepvariant/callvariants/main' +include { DEEPVARIANT_POSTPROCESSVARIANTS } from '../../../modules/nf-core/deepvariant/postprocessvariants/main' + +workflow DEEPVARIANT { + take: + ch_input // channel: [ val(meta), path(input), path(index), path(intervals)] + ch_fasta // channel: [ val(meta2), path(fasta) ] + ch_fai // channel: [ val(meta3), path(fail) ] + ch_gzi // channel: [ val(meta4), path(gzi) ] + ch_par_bed // channel: [ val(meta5), path(par_bed) ] + + main: + + DEEPVARIANT_MAKEEXAMPLES(ch_input, ch_fasta, ch_fai, ch_gzi, ch_par_bed) + + DEEPVARIANT_CALLVARIANTS(DEEPVARIANT_MAKEEXAMPLES.out.examples) + + // Input to postprocessing step needs both the gvcfs from MAKEEXAMPLES and the variant + // calls from CALLVARIANTS. Joining on meta, which is assumed to be unique. + ch_intervals = ch_input.map { meta, _input, _index, intervals -> [ meta, intervals ] } + + ch_postproc_input = DEEPVARIANT_CALLVARIANTS.out.call_variants_tfrecords.join( + DEEPVARIANT_MAKEEXAMPLES.out.gvcf, + failOnMismatch: true + ).join( + DEEPVARIANT_MAKEEXAMPLES.out.small_model_calls, + failOnMismatch: true + ).join( + ch_intervals, + failOnMismatch: true + ) + + DEEPVARIANT_POSTPROCESSVARIANTS( + ch_postproc_input, + ch_fasta, + ch_fai, + ch_gzi + ) + + emit: + vcf = DEEPVARIANT_POSTPROCESSVARIANTS.out.vcf + vcf_index = DEEPVARIANT_POSTPROCESSVARIANTS.out.vcf_index + gvcf = DEEPVARIANT_POSTPROCESSVARIANTS.out.gvcf + gvcf_index = DEEPVARIANT_POSTPROCESSVARIANTS.out.gvcf_index +} diff --git a/subworkflows/nf-core/deepvariant/meta.yml b/subworkflows/nf-core/deepvariant/meta.yml new file mode 100644 index 00000000..bd459a62 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/meta.yml @@ -0,0 +1,77 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: deepvariant +description: DeepVariant is an analysis pipeline that uses a deep neural network to call genetic variants from next-generation DNA sequencing data +keywords: + - variant calling + - machine learning + - neural network +components: + - deepvariant/makeexamples + - deepvariant/callvariants + - deepvariant/postprocessvariants +input: + - ch_input: + type: list + description: | + Input aligned reads in bam or cram format, with index, and optional intervals BED file + Structure: [ val(meta), path(bam_or_cram), path(bai_or_crai), path(intervals_bed) ] + - ch_fasta: + type: file + description: | + Reference genome + Structure: [ val(meta2), path(fasta) ] + - ch_fai: + type: string + description: | + Reference genome index in fai format + Structure: [ val(meta3), path(fai) ] + - ch_gzi: + type: string + description: | + Reference genome index in gzi format (either gzi or fai should be used) + Structure: [ val(meta4), val(gzi) ] + - ch_par_bed: + type: string + description: | + bed file of pseudoautosomal regions (optional) + Structure: [ val(meta5), val(par_bed) ] + pattern: "*.bed" +output: + - vcf: + type: file + description: | + Variant calls + Structure: [ val(meta), path(vcf) ] + pattern: "*.vcf.gz" + - vcf_tbi: + type: file + description: | + Index for variant call file + Structure: [ val(meta), path(vcf_tbi) ] + pattern: "*.tbi" + - gvcf: + type: file + description: | + Variant call file with genomic coverage information + Structure: [ val(meta), path(gvcf) ] + pattern: "*.g.vcf.gz" + - gvcf_tbi: + type: file + description: | + Index for the GVCF. + Structure: [ val(meta), path(gvcf_tbi) ] + pattern: "*.tbi" + - versions: + type: file + description: | + File containing software versions + Structure: path(versions.yml) + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" +maintainers: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" diff --git a/subworkflows/nf-core/deepvariant/tests/deepvariant-workflow-and-process-equality-tester.nf b/subworkflows/nf-core/deepvariant/tests/deepvariant-workflow-and-process-equality-tester.nf new file mode 100644 index 00000000..83a16d55 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/deepvariant-workflow-and-process-equality-tester.nf @@ -0,0 +1,22 @@ +include { DEEPVARIANT_RUNDEEPVARIANT } from '../../../../modules/nf-core/deepvariant/rundeepvariant/main' +include { DEEPVARIANT } from '../main' + +workflow DEEPVARIANT_WORKFLOW_AND_PROCESS_EQUALITY_TESTER { + take: + ch_input // channel: [ val(meta), path(input), path(index), path(intervals)] + ch_fasta // channel: [ val(meta2), path(fasta) ] + ch_fai // channel: [ val(meta3), path(fail) ] + ch_gzi // channel: [ val(meta4), path(gzi) ] + ch_par_bed // channel: [ val(meta5), path(par_bed) ] + + main: + + DEEPVARIANT(ch_input, ch_fasta, ch_fai, ch_gzi, ch_par_bed) + DEEPVARIANT_RUNDEEPVARIANT(ch_input, ch_fasta, ch_fai, ch_gzi, ch_par_bed) + + emit: + wf_vcf = DEEPVARIANT.out.vcf + pc_vcf = DEEPVARIANT_RUNDEEPVARIANT.out.vcf + wf_gvcf = DEEPVARIANT.out.gvcf + pc_gvcf = DEEPVARIANT_RUNDEEPVARIANT.out.gvcf +} diff --git a/subworkflows/nf-core/deepvariant/tests/disable-small-model.conf b/subworkflows/nf-core/deepvariant/tests/disable-small-model.conf new file mode 100644 index 00000000..eb1b53bc --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/disable-small-model.conf @@ -0,0 +1,8 @@ +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + ext.args = '--checkpoint "/opt/models/wgs"' + } + withName: "DEEPVARIANT_CALLVARIANTS" { + ext.args = '--checkpoint "/opt/models/wgs"' + } +} diff --git a/subworkflows/nf-core/deepvariant/tests/equality.nf.test b/subworkflows/nf-core/deepvariant/tests/equality.nf.test new file mode 100644 index 00000000..c4a2276e --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/equality.nf.test @@ -0,0 +1,63 @@ + +nextflow_workflow { + + name "Compare subworkflow DEEPVARIANT to the process DEEPVARIANT_RUNDEEPVARIANT" + script "./deepvariant-workflow-and-process-equality-tester.nf" + config "./nextflow.config" + workflow "DEEPVARIANT_WORKFLOW_AND_PROCESS_EQUALITY_TESTER" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/deepvariant" + + tag "deepvariant" + tag "deepvariant/makeexamples" + tag "deepvariant/callvariants" + tag "deepvariant/postprocessvariants" + tag "deepvariant/rundeepvariant" + + test("ensure that the subworkflow and DEEPVARIANT_RUNDEEPVARIANT have the same output") { + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert path(workflow.out.wf_vcf[0][1]).vcf.variantsMD5 == path(workflow.out.pc_vcf[0][1]).vcf.variantsMD5 }, + { assert path(workflow.out.wf_gvcf[0][1]).vcf.variantsMD5 == path(workflow.out.pc_gvcf[0][1]).vcf.variantsMD5 }, + ) + } + } +} diff --git a/subworkflows/nf-core/deepvariant/tests/main.nf.test b/subworkflows/nf-core/deepvariant/tests/main.nf.test new file mode 100644 index 00000000..d2451980 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/main.nf.test @@ -0,0 +1,152 @@ +nextflow_workflow { + + name "Test Subworkflow DEEPVARIANT" + script "../main.nf" + config "./nextflow.config" + workflow "DEEPVARIANT" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/deepvariant" + + tag "deepvariant" + tag "deepvariant/makeexamples" + tag "deepvariant/callvariants" + tag "deepvariant/postprocessvariants" + + test("homo_sapiens - two inputs - bam - fasta - fai") { + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ]) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("homo_sapiens - different samples and regions - cram - fasta - fai") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("homo_sapiens - disable small model - cram - fasta - fai") { + + config "./disable-small-model.conf" + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/deepvariant/tests/main.nf.test.snap b/subworkflows/nf-core/deepvariant/tests/main.nf.test.snap new file mode 100644 index 00000000..2d14299a --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/main.nf.test.snap @@ -0,0 +1,419 @@ +{ + "homo_sapiens - disable small model - cram - fasta - fai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,0c57956b2f5a0cff8d09a19790ef94f6" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,5f3d98908d46297c7a658654d5bb3015" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,9ae649fed4de493a027697b339bfab36" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,d6a114149024aa8cd74dda2f1c559f5b" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,021f94de713efa7c83d0547f81412dbf" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,86f9c844a90351483c715e7bcc604841" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,3c690275c3d0b55bacb9469199b4d6d8" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,c65395b29f520cf2af04f211f9be2b36" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,021f94de713efa7c83d0547f81412dbf" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,86f9c844a90351483c715e7bcc604841" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,3c690275c3d0b55bacb9469199b4d6d8" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,c65395b29f520cf2af04f211f9be2b36" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,0c57956b2f5a0cff8d09a19790ef94f6" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,5f3d98908d46297c7a658654d5bb3015" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,9ae649fed4de493a027697b339bfab36" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,d6a114149024aa8cd74dda2f1c559f5b" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-29T00:09:09.621357638" + }, + "homo_sapiens - different samples and regions - cram - fasta - fai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,3176f86df96e50687db733c94d9c6689" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,ed7ca1a16bcff42bced0be77ee70662e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,875b521c835441277a527d41c950e4f5" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,ca6f9ca8d50d339f5d65e4ec4e9a6ea6" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,875b521c835441277a527d41c950e4f5" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,ca6f9ca8d50d339f5d65e4ec4e9a6ea6" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,3176f86df96e50687db733c94d9c6689" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,ed7ca1a16bcff42bced0be77ee70662e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-29T00:05:25.205895104" + }, + "homo_sapiens - two inputs - bam - fasta - fai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-29T00:01:12.430387646" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/deepvariant/tests/nextflow.config b/subworkflows/nf-core/deepvariant/tests/nextflow.config new file mode 100644 index 00000000..ad76d4ec --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/nextflow.config @@ -0,0 +1,14 @@ +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"' + } + withName: "DEEPVARIANT_CALLVARIANTS" { + ext.args = '--checkpoint "/opt/models/wgs"' + } + + // This configures RUNDEEPVARIANT, which is used as a reference for the correct test output + withName: DEEPVARIANT_RUNDEEPVARIANT { + ext.args = '--model_type=WGS ' + ext.prefix = { "${meta.id}_out" } + } +} From 12869b048ae432a3980fdec8dd21d1c3ef1df1ea Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Thu, 12 Mar 2026 14:38:20 +0100 Subject: [PATCH 02/36] add deepsomatic --- modules.json | 5 + modules/nf-core/deepsomatic/main.nf | 71 +++++++++ modules/nf-core/deepsomatic/meta.yml | 144 ++++++++++++++++++ .../nf-core/deepsomatic/tests/main.nf.test | 59 +++++++ .../deepsomatic/tests/main.nf.test.snap | 20 +++ .../nf-core/deepsomatic/tests/nextflow.config | 6 + 6 files changed, 305 insertions(+) create mode 100644 modules/nf-core/deepsomatic/main.nf create mode 100644 modules/nf-core/deepsomatic/meta.yml create mode 100644 modules/nf-core/deepsomatic/tests/main.nf.test create mode 100644 modules/nf-core/deepsomatic/tests/main.nf.test.snap create mode 100644 modules/nf-core/deepsomatic/tests/nextflow.config diff --git a/modules.json b/modules.json index 084a0f43..2d23478c 100644 --- a/modules.json +++ b/modules.json @@ -27,6 +27,11 @@ "installed_by": ["modules", "vcf_gather_bcftools"], "patch": "modules/nf-core/bcftools/sort/bcftools-sort.diff" }, + "deepsomatic": { + "branch": "master", + "git_sha": "ddb0d667cf6cdee3bab9497241de4bbf6b88d8cc", + "installed_by": ["modules"] + }, "deepvariant/callvariants": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", diff --git a/modules/nf-core/deepsomatic/main.nf b/modules/nf-core/deepsomatic/main.nf new file mode 100644 index 00000000..7a17793e --- /dev/null +++ b/modules/nf-core/deepsomatic/main.nf @@ -0,0 +1,71 @@ +process DEEPSOMATIC { + tag "$meta.id" + label 'process_high' + + container "docker.io/google/deepsomatic:1.7.0" + input: + tuple val(meta), path(input_normal), path(index_normal), path(input_tumor), path(index_tumor) + tuple val(meta2), path(intervals) + tuple val(meta3), path(fasta) + tuple val(meta4), path(fai) + tuple val(meta5), path(gzi) + + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: vcf_tbi + tuple val(meta), path("${prefix}.g.vcf.gz") , emit: gvcf + tuple val(meta), path("${prefix}.g.vcf.gz.tbi"), emit: gvcf_tbi + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def regions = intervals ? "--regions=${intervals}" : "" + def VERSION = '1.7.0' + + """ + run_deepsomatic \\ + --ref=${fasta} \\ + --reads_normal=${input_normal} \\ + --reads_tumor=${input_tumor} \\ + --output_vcf=${prefix}.vcf.gz \\ + --output_gvcf=${prefix}.g.vcf.gz \\ + --sample_name_tumor="tumor" \\ + --sample_name_normal="normal" \\ + ${args} \\ + ${regions} \\ + --intermediate_results_dir=tmp \\ + --num_shards=${task.cpus} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepsomatic: $VERSION + END_VERSIONS + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + def VERSION = '1.7.0' + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + echo "" | gzip > ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + deepsomatic: $VERSION + END_VERSIONS + """ +} diff --git a/modules/nf-core/deepsomatic/meta.yml b/modules/nf-core/deepsomatic/meta.yml new file mode 100644 index 00000000..0d8afb5b --- /dev/null +++ b/modules/nf-core/deepsomatic/meta.yml @@ -0,0 +1,144 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "deepsomatic" +description: DeepSomatic is an extension of deep learning-based variant caller DeepVariant + that takes aligned reads (in BAM or CRAM format) from tumor and normal data, produces + pileup image tensors from them, classifies each tensor using a convolutional neural + network, and finally reports somatic variants in a standard VCF or gVCF file. +keywords: + - variant calling + - machine learning + - neural network +tools: + - "deepsomatic": + description: "" + homepage: "https://github.com/google/deepsomatic" + documentation: "https://github.com/google/deepsomatic" + tool_dev_url: "https://github.com/google/deepsomatic" + doi: "10.1101/2024.08.16.608331" + licence: ["BSD-3-clause"] + identifier: "biotools:deepsomatic" + +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input_normal: + type: file + description: BAM/CRAM file + pattern: "*.bam/cram" + ontologies: [] + - index_normal: + type: file + description: Index of BAM/CRAM file + pattern: "*.bai/crai" + ontologies: [] + - input_tumor: + type: file + description: BAM/CRAM file + pattern: "*.bam/cram" + ontologies: [] + - index_tumor: + type: file + description: Index of BAM/CRAM file + pattern: "*.bai/crai" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - intervals: + type: file + description: file containing intervals + ontologies: [] + - - meta3: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fasta: + type: file + description: The reference fasta file + pattern: "*.fasta" + ontologies: [] + - - meta4: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - fai: + type: file + description: Index of reference fasta file + pattern: "*.fai" + ontologies: [] + - - meta5: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - gzi: + type: file + description: GZI index of reference fasta file + pattern: "*.gzi" + + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}.vcf.gz: + type: file + description: Compressed VCF file + pattern: "*.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + vcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}.vcf.gz.tbi: + type: file + description: Index of compressed VCF file + pattern: "*.vcf.gz.tbi" + ontologies: [] + gvcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}.g.vcf.gz: + type: file + description: Compressed GVCF file + pattern: "*.g.vcf.gz" + ontologies: + - edam: http://edamontology.org/format_3989 # GZIP format + gvcf_tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1', single_end:false ]` + - ${prefix}.g.vcf.gz.tbi: + type: file + description: Index of compressed Genotyped VCF file + pattern: "*.g.vcf.gz.tbi" + ontologies: [] + versions: + - versions.yml: + type: file + description: File containing software versions + pattern: "versions.yml" + ontologies: + - edam: http://edamontology.org/format_3750 # YAML +authors: + - "@vaxyzek" +maintainers: + - "@vaxyzek" diff --git a/modules/nf-core/deepsomatic/tests/main.nf.test b/modules/nf-core/deepsomatic/tests/main.nf.test new file mode 100644 index 00000000..eaa5f8fe --- /dev/null +++ b/modules/nf-core/deepsomatic/tests/main.nf.test @@ -0,0 +1,59 @@ +nextflow_process { + + name "Test Process DEEPSOMATIC" + script "../main.nf" + process "DEEPSOMATIC" + + tag "modules" + tag "modules_nfcore" + tag "deepsomatic" + + test("tumor_normal_pair") { + config './nextflow.config' + + when { + process { + """ + input[0] = [ + [ id:'tumor_vs_normal' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) + ] + input[1] = [ + [ id:'intervals' ], + [] + ] + input[2] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) + ] + input[3] = [ + [ id:'genome' ], + file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) + ] + input[4] = [ + [ id: 'gzi' ], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { + assert snapshot( + process.out.vcf.collect { file(it[1]).getName() }, + process.out.tbi.collect { file(it[1]).getName() }, + process.out.versions, + ).match() + } + ) + } + + } + +} diff --git a/modules/nf-core/deepsomatic/tests/main.nf.test.snap b/modules/nf-core/deepsomatic/tests/main.nf.test.snap new file mode 100644 index 00000000..1a7886ec --- /dev/null +++ b/modules/nf-core/deepsomatic/tests/main.nf.test.snap @@ -0,0 +1,20 @@ +{ + "tumor_normal_pair": { + "content": [ + [ + "tumor_vs_normal_out.vcf.gz" + ], + [ + + ], + [ + "versions.yml:md5,d64cbd049771dd1a8d0885499ea16f11" + ] + ], + "meta": { + "nf-test": "0.9.0", + "nextflow": "24.04.3" + }, + "timestamp": "2024-10-22T21:52:00.932502018" + } +} \ No newline at end of file diff --git a/modules/nf-core/deepsomatic/tests/nextflow.config b/modules/nf-core/deepsomatic/tests/nextflow.config new file mode 100644 index 00000000..fff76401 --- /dev/null +++ b/modules/nf-core/deepsomatic/tests/nextflow.config @@ -0,0 +1,6 @@ +process { + withName: DEEPSOMATIC { + ext.args = "--regions='chr21:6110000-6120000'" + ext.prefix = { "${meta.id}_out" } + } +} From 388d18075442cf8cbdd712f18d3409f078a35f03 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Fri, 13 Mar 2026 11:39:36 +0100 Subject: [PATCH 03/36] get deepvariant running --- .gitignore | 1 + conf/modules.config | 30 +++++++++++++++++++++ conf/test.config | 13 +++++++++ subworkflows/local/tumor_normal_happhase.nf | 17 ++++++++++++ subworkflows/local/tumor_only_happhase.nf | 16 +++++++++++ subworkflows/nf-core/deepvariant/main.nf | 2 +- 6 files changed, 78 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 9e307203..8be7152c 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ testing* null/ .nf-test .nf-test.log +out/ \ No newline at end of file diff --git a/conf/modules.config b/conf/modules.config index ce3ec749..ce03c41c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -87,6 +87,36 @@ process { ] } + withName: '.*DEEPVARIANT_MAKEEXAMPLES' { + ext.args = { + meta.platform == 'pb' + ? '--channel_list "BASE_CHANNELS,haplotype,base_6ma" --alt_aligned_pileup "diff_channels" --pileup_image_width "147"' + : '--channel_list "BASE_CHANNELS,haplotype" --alt_aligned_pileup "diff_channels" --pileup_image_width "99"' + } + publishDir = [ + enabled: false + ] + } + + withName: '.*DEEPVARIANT_POSTPROCESSVARIANTS' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/variants/deepvariant" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*DEEPVARIANT_CALLVARIANTS' { + ext.args = { + meta.platform == 'pb' ? ("--checkpoint '/opt/models/pacbio' ") : ("--checkpoint '/opt/models/ont_r104'") + } + publishDir = [ + enabled : false + ] + } + + + withName: '.*:UNZIP_.*' { publishDir = [ enabled: false diff --git a/conf/test.config b/conf/test.config index 416ed610..8b9b08f4 100644 --- a/conf/test.config +++ b/conf/test.config @@ -18,6 +18,19 @@ process { time: '1.h' ] } + + withName: '.*DEEPVARIANT_MAKEEXAMPLES' { + ext.args = { + "--regions 'chr19'" + } + } + + withName: '.*DEEPVARIANT_POSTPROCESSVARIANTS' { + ext.args = { + "--regions 'chr19'" + } + } + } params { diff --git a/subworkflows/local/tumor_normal_happhase.nf b/subworkflows/local/tumor_normal_happhase.nf index 6abe9941..2ccdbf5d 100644 --- a/subworkflows/local/tumor_normal_happhase.nf +++ b/subworkflows/local/tumor_normal_happhase.nf @@ -5,6 +5,8 @@ include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index include { CLAIRS } from '../../modules/local/clairs/main.nf' include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat' include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort' +include { DEEPVARIANT } from '../../subworkflows/nf-core/deepvariant/main.nf' + workflow TUMOR_NORMAL_HAPPHASE { take: @@ -105,6 +107,21 @@ workflow TUMOR_NORMAL_HAPPHASE { fasta, fai ) + + normal_bams + .map {meta, bam, bai, _model, _platform -> + def intervals = [] + return [meta, bam, bai, intervals] + } + .set{deepvar_normal_bams} + + DEEPVARIANT ( + deepvar_normal_bams, + fasta, + fai, + [[:],[]], + [[:],[]] + ) // Add germline vcf to normal bams // remove clair3 model information diff --git a/subworkflows/local/tumor_only_happhase.nf b/subworkflows/local/tumor_only_happhase.nf index 2e09f750..e051541e 100644 --- a/subworkflows/local/tumor_only_happhase.nf +++ b/subworkflows/local/tumor_only_happhase.nf @@ -3,6 +3,7 @@ include { VCFSPLIT } from '../../modules/local/vcfsplit/main.nf include { LONGPHASE_PHASE } from '../../modules/nf-core/longphase/phase/main' include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' +include { DEEPVARIANT } from '../../subworkflows/nf-core/deepvariant/main.nf' workflow TUMOR_ONLY_HAPPHASE { @@ -28,6 +29,21 @@ workflow TUMOR_ONLY_HAPPHASE { } .set{ tumor_bams } + tumor_bams + .map { meta, bam, bai, _clairSTO_model -> + def intervals = [] + return [meta,bam,bai, intervals] + } + .set{tumor_bams_deepvar} + + DEEPVARIANT ( + tumor_bams_deepvar, + fasta, + fai, + [[:],[]], + [[:],[]] + ) + // // MODULE: CLAIRSTO // diff --git a/subworkflows/nf-core/deepvariant/main.nf b/subworkflows/nf-core/deepvariant/main.nf index 439cbc90..0b358ff7 100644 --- a/subworkflows/nf-core/deepvariant/main.nf +++ b/subworkflows/nf-core/deepvariant/main.nf @@ -6,7 +6,7 @@ workflow DEEPVARIANT { take: ch_input // channel: [ val(meta), path(input), path(index), path(intervals)] ch_fasta // channel: [ val(meta2), path(fasta) ] - ch_fai // channel: [ val(meta3), path(fail) ] + ch_fai // channel: [ val(meta3), path(fai) ] ch_gzi // channel: [ val(meta4), path(gzi) ] ch_par_bed // channel: [ val(meta5), path(par_bed) ] From af3a58e2122b3e48bd0453dd1ec42b4e87c86da6 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Fri, 13 Mar 2026 15:12:36 +0100 Subject: [PATCH 04/36] add deep somatic and split processes for more efficent gpu requests --- conf/modules.config | 40 +++++ conf/test.config | 10 ++ .../local/deepsomatic/callvariants/main.nf | 49 ++++++ .../local/deepsomatic/makeexamples/main.nf | 58 +++++++ .../deepsomatic/postprocessvariants/main.nf | 87 +++++++++++ modules/nf-core/deepsomatic/main.nf | 71 --------- modules/nf-core/deepsomatic/meta.yml | 144 ------------------ .../nf-core/deepsomatic/tests/main.nf.test | 59 ------- .../deepsomatic/tests/main.nf.test.snap | 20 --- .../nf-core/deepsomatic/tests/nextflow.config | 6 - subworkflows/local/deepsomatic.nf | 42 +++++ subworkflows/local/tumor_normal_happhase.nf | 16 ++ subworkflows/local/tumor_only_happhase.nf | 18 +++ 13 files changed, 320 insertions(+), 300 deletions(-) create mode 100644 modules/local/deepsomatic/callvariants/main.nf create mode 100644 modules/local/deepsomatic/makeexamples/main.nf create mode 100644 modules/local/deepsomatic/postprocessvariants/main.nf delete mode 100644 modules/nf-core/deepsomatic/main.nf delete mode 100644 modules/nf-core/deepsomatic/meta.yml delete mode 100644 modules/nf-core/deepsomatic/tests/main.nf.test delete mode 100644 modules/nf-core/deepsomatic/tests/main.nf.test.snap delete mode 100644 modules/nf-core/deepsomatic/tests/nextflow.config create mode 100644 subworkflows/local/deepsomatic.nf diff --git a/conf/modules.config b/conf/modules.config index ce03c41c..697c1c2c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -115,6 +115,46 @@ process { ] } + withName: '.*DEEPSOMATIC_MAKEEXAMPLES' { + ext.args = { + meta.platform == 'pb' + ? meta.paired_data + ? '--channel_list "BASE_CHANNELS,haplotype" --alt_aligned_pileup "diff_channels" --pileup_image_width "99"' + : '--channel_list "BASE_CHANNELS,haplotype,allele_frequency" --alt_aligned_pileup "diff_channels" --pileup_image_width "99" --population_vcfs "/opt/models/deepsomatic/pons/AF_pacbio_PON_CoLoRSdb.GRCh38.AF0.05.vcf.gz"' + : meta.paired_data + ? '--channel_list "BASE_CHANNELS,haplotype" --alt_aligned_pileup "diff_channels" --pileup_image_width "99"' + : '--channel_list "BASE_CHANNELS,haplotype,allele_frequency" --alt_aligned_pileup "diff_channels" --pileup_image_width "99" --population_vcfs "/opt/models/deepsomatic/pons/ON_dbsnp138_gnomad_ILMN1000g_pon.vcf.gz"' + } + publishDir = [ + enabled: false + ] + } + + withName: '.*DEEPSOMATIC_POSTPROCESSVARIANTS' { + ext.args = { + '--process_somatic=true' + } + publishDir = [ + path: { "${params.outdir}/${meta.id}/variants/deepsomatic" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + + withName: '.*DEEPSOMATIC_CALLVARIANTS' { + ext.args = { + meta.platform == 'pb' + ? (meta.paired_data + ? "--checkpoint '/opt/models/deepsomatic/pacbio'" + : "--checkpoint '/opt/models/deepsomatic/pacbio_tumor_only'" ) + : (meta.paired_data + ? "--checkpoint '/opt/models/deepsomatic/ont'" + : "--checkpoint '/opt/models/deepsomatic/ont_tumor_only'") + } + publishDir = [ + enabled : false + ] + } withName: '.*:UNZIP_.*' { diff --git a/conf/test.config b/conf/test.config index 8b9b08f4..96e61b28 100644 --- a/conf/test.config +++ b/conf/test.config @@ -30,6 +30,16 @@ process { "--regions 'chr19'" } } + withName: '.*DEEPSOMATIC_MAKEEXAMPLES' { + ext.args = { + "--regions 'chr19'" + } + } + withName: '.*DEEPSOMATIC_POSTPROCESSVARIANTS' { + ext.args = { + "--regions 'chr19'" + } + } } diff --git a/modules/local/deepsomatic/callvariants/main.nf b/modules/local/deepsomatic/callvariants/main.nf new file mode 100644 index 00000000..8491e664 --- /dev/null +++ b/modules/local/deepsomatic/callvariants/main.nf @@ -0,0 +1,49 @@ +process DEEPSOMATIC_CALLVARIANTS { + tag "$meta.id" + label 'process_high' + + //Conda is not supported at the moment + container "docker.io/google/deepsomatic:1.7.0" + + input: + tuple val(meta), path(make_examples_tfrecords) + + output: + tuple val(meta), path("${prefix}.call-*-of-*.tfrecord.gz"), emit: call_variants_tfrecords + tuple val("${task.process}"), val('deepsomatic'), val('1.7.0'), topic: versions, emit: versions_deepsomatic + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def matcher = make_examples_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ + if (!matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + make_examples_tfrecords[0].baseName + "' doesn't match the expected pattern") + } + def examples_tfrecord_name = matcher[0][1] + def shardCount = matcher[0][2] + // Reconstruct the logical name - ${tfrecord_name}@.gz + def examples_tfrecords_logical_name = "${examples_tfrecord_name}@${shardCount}.gz" + + """ + /opt/deepvariant/bin/call_variants \\ + ${args} \\ + --outfile "${prefix}.call.tfrecord.gz" \\ + --examples "${examples_tfrecords_logical_name}" + + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.call-00000-of-00001.tfrecord.gz + + """ +} diff --git a/modules/local/deepsomatic/makeexamples/main.nf b/modules/local/deepsomatic/makeexamples/main.nf new file mode 100644 index 00000000..f1b148b4 --- /dev/null +++ b/modules/local/deepsomatic/makeexamples/main.nf @@ -0,0 +1,58 @@ +process DEEPSOMATIC_MAKEEXAMPLES { + tag "$meta.id" + label 'process_high' + + //Conda is not supported at the moment + container "docker.io/google/deepsomatic:1.7.0" + + input: + tuple val(meta), path(normal_input), path(normal_index), path(tumor_input), path(tumor_index) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + + output: + tuple val(meta), path("${prefix}.examples.tfrecord-*-of-*.gz{,.example_info.json}"), emit: examples + tuple val(meta), path("${prefix}.gvcf.tfrecord-*-of-*.gz"), emit: gvcf + tuple val(meta), path("${prefix}_call_variant_outputs.tfrecord-*-of-*.gz", arity: "0..*"), emit: small_model_calls + tuple val("${task.process}"), val('deepsomatic'), val('1.7.0'), topic: versions, emit: versions_deepsomatic + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def normalReadsArg = (normal_input?.toString() && normal_input.toString() != '[]') ? "--reads_normal \"${normal_input}\"" : "" + def normalSampleArg = (normal_input?.toString() && normal_input.toString() != '[]') ? "--sample_name_normal \"${prefix}_normal\"" : "" + + """ + seq 0 ${task.cpus - 1} | parallel -q --halt 2 --line-buffer /opt/deepvariant/bin/make_examples_somatic \\ + --mode calling \\ + --ref "${fasta}" \\ + --reads_tumor "${tumor_input}" \\ + ${normalReadsArg} \\ + --sample_name_tumor "${prefix}_tumor" \\ + ${normalSampleArg} \\ + --examples "./${prefix}.examples.tfrecord@${task.cpus}.gz" \\ + --gvcf "./${prefix}.gvcf.tfrecord@${task.cpus}.gz" \\ + ${args} \\ + --task {} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + printf -v SHARD_COUNT "%04d" ${task.cpus} + for i in \$( seq -f "%04g" 0 ${task.cpus-1} ) + do + echo "" | gzip > ${prefix}.examples.tfrecord-\$i-of-\$SHARD_COUNT.gz + touch ${prefix}.examples.tfrecord-\$i-of-\$SHARD_COUNT.gz.example_info.json + echo "" | gzip > ${prefix}.gvcf.tfrecord-\$i-of-\$SHARD_COUNT.gz + done + """ +} diff --git a/modules/local/deepsomatic/postprocessvariants/main.nf b/modules/local/deepsomatic/postprocessvariants/main.nf new file mode 100644 index 00000000..a192b57f --- /dev/null +++ b/modules/local/deepsomatic/postprocessvariants/main.nf @@ -0,0 +1,87 @@ +process DEEPSOMATIC_POSTPROCESSVARIANTS { + tag "$meta.id" + label 'process_medium' + + //Conda is not supported at the moment + container "docker.io/google/deepsomatic:1.7.0" + + input: + tuple val(meta), path(variant_calls_tfrecord_files), path(gvcf_tfrecords), val(small_model_calls), val(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.{tbi,csi}") , emit: vcf_index + tuple val(meta), path("${prefix}.g.vcf.gz") , emit: gvcf + tuple val(meta), path("${prefix}.g.vcf.gz.{tbi,csi}") , emit: gvcf_index + tuple val("${task.process}"), val('deepsomatic'), val('1.7.0'), topic: versions, emit: versions_deepsomatic + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def regions = intervals ? "--regions ${intervals}" : "" + def variant_calls_tfrecord_name = variant_calls_tfrecord_files[0].name.replaceFirst(/-\d{5}-of-\d{5}/, "") + + def gvcf_matcher = gvcf_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ + if (!gvcf_matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + gvcf_tfrecords[0].baseName + "' doesn't match the expected pattern") + } + def gvcf_tfrecord_name = gvcf_matcher[0][1] + def gvcf_shardCount = gvcf_matcher[0][2] + // Reconstruct the logical name - ${tfrecord_name}.examples.tfrecord@${task.cpus}.gz + def gvcf_tfrecords_logical_name = "${gvcf_tfrecord_name}@${gvcf_shardCount}.gz" + + // The following block determines whether the small model was used, and if so, adds the variant calls from it + // to the argument --small_model_cvo_records. + def small_model_arg = "" + if (small_model_calls && small_model_calls.size() > 0) { + def small_model_matcher = (small_model_calls[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/) + if (!small_model_matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + small_model_calls[0].baseName + "' doesn't match the expected pattern") + } + def small_model_tfrecord_name = small_model_matcher[0][1] + def small_model_shardCount = small_model_matcher[0][2] + // Reconstruct the logical name. Example: test_call_variant_outputs.examples.tfrecord@12.gz + def small_model_tfrecords_logical_name = "${small_model_tfrecord_name}@${small_model_shardCount}.gz" + small_model_arg = "--small_model_cvo_records ${small_model_tfrecords_logical_name}" + } + + """ + /opt/deepvariant/bin/postprocess_variants \\ + ${args} \\ + --ref "${fasta}" \\ + --infile "${variant_calls_tfrecord_name}" \\ + --outfile "${prefix}.vcf.gz" \\ + --process_somatic=true \\ + --nonvariant_site_tfrecord_path "${gvcf_tfrecords_logical_name}" \\ + --gvcf_outfile "${prefix}.g.vcf.gz" \\ + ${regions} \\ + ${small_model_arg} \\ + --cpus $task.cpus + + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + echo "" | gzip > ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi + + """ +} diff --git a/modules/nf-core/deepsomatic/main.nf b/modules/nf-core/deepsomatic/main.nf deleted file mode 100644 index 7a17793e..00000000 --- a/modules/nf-core/deepsomatic/main.nf +++ /dev/null @@ -1,71 +0,0 @@ -process DEEPSOMATIC { - tag "$meta.id" - label 'process_high' - - container "docker.io/google/deepsomatic:1.7.0" - input: - tuple val(meta), path(input_normal), path(index_normal), path(input_tumor), path(index_tumor) - tuple val(meta2), path(intervals) - tuple val(meta3), path(fasta) - tuple val(meta4), path(fai) - tuple val(meta5), path(gzi) - - output: - tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf - tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: vcf_tbi - tuple val(meta), path("${prefix}.g.vcf.gz") , emit: gvcf - tuple val(meta), path("${prefix}.g.vcf.gz.tbi"), emit: gvcf_tbi - path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when - - script: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." - } - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" - def regions = intervals ? "--regions=${intervals}" : "" - def VERSION = '1.7.0' - - """ - run_deepsomatic \\ - --ref=${fasta} \\ - --reads_normal=${input_normal} \\ - --reads_tumor=${input_tumor} \\ - --output_vcf=${prefix}.vcf.gz \\ - --output_gvcf=${prefix}.g.vcf.gz \\ - --sample_name_tumor="tumor" \\ - --sample_name_normal="normal" \\ - ${args} \\ - ${regions} \\ - --intermediate_results_dir=tmp \\ - --num_shards=${task.cpus} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - deepsomatic: $VERSION - END_VERSIONS - """ - - stub: - // Exit if running this module with -profile conda / -profile mamba - if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { - error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." - } - prefix = task.ext.prefix ?: "${meta.id}" - def VERSION = '1.7.0' - """ - echo "" | gzip > ${prefix}.vcf.gz - touch ${prefix}.vcf.gz.tbi - echo "" | gzip > ${prefix}.g.vcf.gz - touch ${prefix}.g.vcf.gz.tbi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - deepsomatic: $VERSION - END_VERSIONS - """ -} diff --git a/modules/nf-core/deepsomatic/meta.yml b/modules/nf-core/deepsomatic/meta.yml deleted file mode 100644 index 0d8afb5b..00000000 --- a/modules/nf-core/deepsomatic/meta.yml +++ /dev/null @@ -1,144 +0,0 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json -name: "deepsomatic" -description: DeepSomatic is an extension of deep learning-based variant caller DeepVariant - that takes aligned reads (in BAM or CRAM format) from tumor and normal data, produces - pileup image tensors from them, classifies each tensor using a convolutional neural - network, and finally reports somatic variants in a standard VCF or gVCF file. -keywords: - - variant calling - - machine learning - - neural network -tools: - - "deepsomatic": - description: "" - homepage: "https://github.com/google/deepsomatic" - documentation: "https://github.com/google/deepsomatic" - tool_dev_url: "https://github.com/google/deepsomatic" - doi: "10.1101/2024.08.16.608331" - licence: ["BSD-3-clause"] - identifier: "biotools:deepsomatic" - -input: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - input_normal: - type: file - description: BAM/CRAM file - pattern: "*.bam/cram" - ontologies: [] - - index_normal: - type: file - description: Index of BAM/CRAM file - pattern: "*.bai/crai" - ontologies: [] - - input_tumor: - type: file - description: BAM/CRAM file - pattern: "*.bam/cram" - ontologies: [] - - index_tumor: - type: file - description: Index of BAM/CRAM file - pattern: "*.bai/crai" - ontologies: [] - - - meta2: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - intervals: - type: file - description: file containing intervals - ontologies: [] - - - meta3: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fasta: - type: file - description: The reference fasta file - pattern: "*.fasta" - ontologies: [] - - - meta4: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - fai: - type: file - description: Index of reference fasta file - pattern: "*.fai" - ontologies: [] - - - meta5: - type: map - description: | - Groovy Map containing sample information - e.g. [ id:'test', single_end:false ] - - gzi: - type: file - description: GZI index of reference fasta file - pattern: "*.gzi" - - ontologies: [] -output: - vcf: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - ${prefix}.vcf.gz: - type: file - description: Compressed VCF file - pattern: "*.vcf.gz" - ontologies: - - edam: http://edamontology.org/format_3989 # GZIP format - vcf_tbi: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - ${prefix}.vcf.gz.tbi: - type: file - description: Index of compressed VCF file - pattern: "*.vcf.gz.tbi" - ontologies: [] - gvcf: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - ${prefix}.g.vcf.gz: - type: file - description: Compressed GVCF file - pattern: "*.g.vcf.gz" - ontologies: - - edam: http://edamontology.org/format_3989 # GZIP format - gvcf_tbi: - - - meta: - type: map - description: | - Groovy Map containing sample information - e.g. `[ id:'sample1', single_end:false ]` - - ${prefix}.g.vcf.gz.tbi: - type: file - description: Index of compressed Genotyped VCF file - pattern: "*.g.vcf.gz.tbi" - ontologies: [] - versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML -authors: - - "@vaxyzek" -maintainers: - - "@vaxyzek" diff --git a/modules/nf-core/deepsomatic/tests/main.nf.test b/modules/nf-core/deepsomatic/tests/main.nf.test deleted file mode 100644 index eaa5f8fe..00000000 --- a/modules/nf-core/deepsomatic/tests/main.nf.test +++ /dev/null @@ -1,59 +0,0 @@ -nextflow_process { - - name "Test Process DEEPSOMATIC" - script "../main.nf" - process "DEEPSOMATIC" - - tag "modules" - tag "modules_nfcore" - tag "deepsomatic" - - test("tumor_normal_pair") { - config './nextflow.config' - - when { - process { - """ - input[0] = [ - [ id:'tumor_vs_normal' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam', checkIfExists: true), - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/test2.paired_end.recalibrated.sorted.bam.bai', checkIfExists: true) - ] - input[1] = [ - [ id:'intervals' ], - [] - ] - input[2] = [ - [ id:'genome' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta', checkIfExists: true) - ] - input[3] = [ - [ id:'genome' ], - file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/chr21/sequence/genome.fasta.fai', checkIfExists: true) - ] - input[4] = [ - [ id: 'gzi' ], - [] - ] - """ - } - } - - then { - assertAll( - { assert process.success }, - { - assert snapshot( - process.out.vcf.collect { file(it[1]).getName() }, - process.out.tbi.collect { file(it[1]).getName() }, - process.out.versions, - ).match() - } - ) - } - - } - -} diff --git a/modules/nf-core/deepsomatic/tests/main.nf.test.snap b/modules/nf-core/deepsomatic/tests/main.nf.test.snap deleted file mode 100644 index 1a7886ec..00000000 --- a/modules/nf-core/deepsomatic/tests/main.nf.test.snap +++ /dev/null @@ -1,20 +0,0 @@ -{ - "tumor_normal_pair": { - "content": [ - [ - "tumor_vs_normal_out.vcf.gz" - ], - [ - - ], - [ - "versions.yml:md5,d64cbd049771dd1a8d0885499ea16f11" - ] - ], - "meta": { - "nf-test": "0.9.0", - "nextflow": "24.04.3" - }, - "timestamp": "2024-10-22T21:52:00.932502018" - } -} \ No newline at end of file diff --git a/modules/nf-core/deepsomatic/tests/nextflow.config b/modules/nf-core/deepsomatic/tests/nextflow.config deleted file mode 100644 index fff76401..00000000 --- a/modules/nf-core/deepsomatic/tests/nextflow.config +++ /dev/null @@ -1,6 +0,0 @@ -process { - withName: DEEPSOMATIC { - ext.args = "--regions='chr21:6110000-6120000'" - ext.prefix = { "${meta.id}_out" } - } -} diff --git a/subworkflows/local/deepsomatic.nf b/subworkflows/local/deepsomatic.nf new file mode 100644 index 00000000..920795ad --- /dev/null +++ b/subworkflows/local/deepsomatic.nf @@ -0,0 +1,42 @@ +include { DEEPSOMATIC_MAKEEXAMPLES } from '../../modules/local/deepsomatic/makeexamples/main' +include { DEEPSOMATIC_CALLVARIANTS } from '../../modules/local/deepsomatic/callvariants/main' +include { DEEPSOMATIC_POSTPROCESSVARIANTS } from '../../modules/local/deepsomatic/postprocessvariants/main' + +workflow DEEPSOMATIC { + take: + ch_input // channel: [ val(meta), path(normal), path(normal_index), path(tumor), path(tumor_index)] + ch_intervals + ch_fasta // channel: [ val(meta2), path(fasta) ] + ch_fai // channel: [ val(meta3), path(fai) ] + ch_gzi // channel: [ val(meta4), path(gzi) ] + + main: + + DEEPSOMATIC_MAKEEXAMPLES(ch_input, ch_fasta, ch_fai, ch_gzi) + + DEEPSOMATIC_CALLVARIANTS(DEEPSOMATIC_MAKEEXAMPLES.out.examples) + + // Input to postprocessing step needs both the gvcfs from MAKEEXAMPLES and the variant + // calls from CALLVARIANTS. Joining on meta, which is assumed to be unique. + + + ch_postproc_input = DEEPSOMATIC_CALLVARIANTS.out.call_variants_tfrecords.join( + DEEPSOMATIC_MAKEEXAMPLES.out.gvcf, + failOnMismatch: true + ).map { meta, call_tfrecord, gvcf_tfrecords -> + [meta, call_tfrecord, gvcf_tfrecords, [], []] + } + + DEEPSOMATIC_POSTPROCESSVARIANTS( + ch_postproc_input, + ch_fasta, + ch_fai, + ch_gzi + ) + + emit: + vcf = DEEPSOMATIC_POSTPROCESSVARIANTS.out.vcf + vcf_index = DEEPSOMATIC_POSTPROCESSVARIANTS.out.vcf_index + gvcf = DEEPSOMATIC_POSTPROCESSVARIANTS.out.gvcf + gvcf_index = DEEPSOMATIC_POSTPROCESSVARIANTS.out.gvcf_index +} diff --git a/subworkflows/local/tumor_normal_happhase.nf b/subworkflows/local/tumor_normal_happhase.nf index 2ccdbf5d..9a0698ee 100644 --- a/subworkflows/local/tumor_normal_happhase.nf +++ b/subworkflows/local/tumor_normal_happhase.nf @@ -6,6 +6,7 @@ include { CLAIRS } from '../../modules/local/clairs/main.nf' include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat' include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort' include { DEEPVARIANT } from '../../subworkflows/nf-core/deepvariant/main.nf' +include { DEEPSOMATIC } from '../../subworkflows/local/deepsomatic.nf' workflow TUMOR_NORMAL_HAPPHASE { @@ -284,6 +285,21 @@ workflow TUMOR_NORMAL_HAPPHASE { return[meta , tumor_bam, tumor_bai, normal_bam, normal_bai, meta.clairS_model] } .set { clairs_input } + + tumor_normal_severus + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, _vcf, _tbi -> + return [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + } + .set{ deepsomatic_input } + + DEEPSOMATIC ( + deepsomatic_input, + [[:],[]], + fasta, + fai, + [[:],[]] + ) + // // MODULE: CLAIRS // diff --git a/subworkflows/local/tumor_only_happhase.nf b/subworkflows/local/tumor_only_happhase.nf index e051541e..de797ebf 100644 --- a/subworkflows/local/tumor_only_happhase.nf +++ b/subworkflows/local/tumor_only_happhase.nf @@ -4,6 +4,8 @@ include { LONGPHASE_PHASE } from '../../modules/nf-core/longphase/phas include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' include { DEEPVARIANT } from '../../subworkflows/nf-core/deepvariant/main.nf' +include { DEEPSOMATIC } from '../../subworkflows/local/deepsomatic.nf' + workflow TUMOR_ONLY_HAPPHASE { @@ -35,6 +37,14 @@ workflow TUMOR_ONLY_HAPPHASE { return [meta,bam,bai, intervals] } .set{tumor_bams_deepvar} + + tumor_bams + .map { meta, tumor_bam, tumor_bai, _clairSTO_model -> + def normal_bam = [] + def normal_bai = [] + return [meta,normal_bam,normal_bai,tumor_bam,tumor_bai] + } + .set{tumor_bams_deepsomatic} DEEPVARIANT ( tumor_bams_deepvar, @@ -44,6 +54,14 @@ workflow TUMOR_ONLY_HAPPHASE { [[:],[]] ) + DEEPSOMATIC ( + tumor_bams_deepsomatic, + [[:],[]], + fasta, + fai, + [[:],[]] + ) + // // MODULE: CLAIRSTO // From ba8cf59c5db4be101af462a4de0dcd113160e475 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Fri, 13 Mar 2026 15:21:59 +0100 Subject: [PATCH 05/36] add gpu process labels for deepvariant, clair3, fibertools --- conf/base.config | 4 ++++ modules/local/clair3/main.nf | 1 + modules/local/deepsomatic/callvariants/main.nf | 1 + modules/local/fibertoolsrs/fire/main.nf | 1 + modules/local/fibertoolsrs/nucleosomes/main.nf | 1 + modules/local/fibertoolsrs/predictm6a/main.nf | 1 + modules/nf-core/deepvariant/callvariants/main.nf | 1 + 7 files changed, 10 insertions(+) diff --git a/conf/base.config b/conf/base.config index 84625158..79e7c3ee 100644 --- a/conf/base.config +++ b/conf/base.config @@ -26,6 +26,10 @@ process { // adding in your local modules too. // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel: process_gpu { + ext.use_gpu = { workflow.profile.contains('gpu') } + accelerator = { workflow.profile.contains('gpu') ? 1 : null } + } withLabel:process_single { cpus = { 1 } memory = { 6.GB * task.attempt } diff --git a/modules/local/clair3/main.nf b/modules/local/clair3/main.nf index 6619ef14..33ef8609 100644 --- a/modules/local/clair3/main.nf +++ b/modules/local/clair3/main.nf @@ -1,6 +1,7 @@ process CLAIR3 { tag "$meta.id" label 'process_very_high' + label 'process_gpu' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/deepsomatic/callvariants/main.nf b/modules/local/deepsomatic/callvariants/main.nf index 8491e664..f796de51 100644 --- a/modules/local/deepsomatic/callvariants/main.nf +++ b/modules/local/deepsomatic/callvariants/main.nf @@ -1,6 +1,7 @@ process DEEPSOMATIC_CALLVARIANTS { tag "$meta.id" label 'process_high' + label 'process_gpu' //Conda is not supported at the moment container "docker.io/google/deepsomatic:1.7.0" diff --git a/modules/local/fibertoolsrs/fire/main.nf b/modules/local/fibertoolsrs/fire/main.nf index 1240f0d5..2d84e7e0 100644 --- a/modules/local/fibertoolsrs/fire/main.nf +++ b/modules/local/fibertoolsrs/fire/main.nf @@ -2,6 +2,7 @@ process FIBERTOOLSRS_FIRE { tag "$meta.id" label 'process_very_high' label 'process_high_memory' + label 'process_gpu' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/fibertoolsrs/nucleosomes/main.nf b/modules/local/fibertoolsrs/nucleosomes/main.nf index 33a6c5ea..7462cd68 100644 --- a/modules/local/fibertoolsrs/nucleosomes/main.nf +++ b/modules/local/fibertoolsrs/nucleosomes/main.nf @@ -2,6 +2,7 @@ process FIBERTOOLSRS_NUCLEOSOMES { tag "$meta.id" label 'process_very_high' label 'process_high_memory' + label 'process_gpu' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/fibertoolsrs/predictm6a/main.nf b/modules/local/fibertoolsrs/predictm6a/main.nf index 5aa174b0..0dabcd20 100644 --- a/modules/local/fibertoolsrs/predictm6a/main.nf +++ b/modules/local/fibertoolsrs/predictm6a/main.nf @@ -2,6 +2,7 @@ process FIBERTOOLSRS_PREDICTM6A { tag "$meta.id" label 'process_very_high' label 'process_high_memory' + label 'process_gpu' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/deepvariant/callvariants/main.nf b/modules/nf-core/deepvariant/callvariants/main.nf index 2fc656ee..251f4c73 100644 --- a/modules/nf-core/deepvariant/callvariants/main.nf +++ b/modules/nf-core/deepvariant/callvariants/main.nf @@ -2,6 +2,7 @@ process DEEPVARIANT_CALLVARIANTS { tag "$meta.id" label 'process_high' + label 'process_gpu' //Conda is not supported at the moment container "docker.io/google/deepvariant:1.9.0" From 80451343b02a6c630c7bd47c25f6dc9afb4fe39c Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Fri, 13 Mar 2026 15:48:38 +0100 Subject: [PATCH 06/36] install helpful bcftools modules for consensus vcfs --- modules.json | 20 +- .../nf-core/bcftools/annotate/environment.yml | 10 + modules/nf-core/bcftools/annotate/main.nf | 81 ++ modules/nf-core/bcftools/annotate/meta.yml | 112 +++ .../bcftools/annotate/tests/main.nf.test | 429 +++++++++ .../bcftools/annotate/tests/main.nf.test.snap | 440 +++++++++ .../bcftools/annotate/tests/nextflow.config | 4 + modules/nf-core/bcftools/isec/environment.yml | 9 + modules/nf-core/bcftools/isec/main.nf | 47 + modules/nf-core/bcftools/isec/meta.yml | 101 ++ .../nf-core/bcftools/isec/tests/main.nf.test | 318 +++++++ .../bcftools/isec/tests/main.nf.test.snap | 348 +++++++ .../bcftools/isec/tests/nextflow.config | 3 + modules/nf-core/bcftools/norm/environment.yml | 10 + modules/nf-core/bcftools/norm/main.nf | 71 ++ modules/nf-core/bcftools/norm/meta.yml | 107 +++ .../nf-core/bcftools/norm/tests/main.nf.test | 545 +++++++++++ .../bcftools/norm/tests/main.nf.test.snap | 876 ++++++++++++++++++ .../bcftools/norm/tests/nextflow.bcf.config | 4 + .../norm/tests/nextflow.bcf_gz.config | 4 + .../bcftools/norm/tests/nextflow.config | 4 + .../bcftools/norm/tests/nextflow.vcf.config | 4 + .../norm/tests/nextflow.vcf_gz.config | 4 + .../bcftools/norm/tests/vcf_gz_index.config | 4 + .../norm/tests/vcf_gz_index_csi.config | 4 + .../norm/tests/vcf_gz_index_tbi.config | 4 + subworkflows/local/small_variant_consensus.nf | 6 + 27 files changed, 3564 insertions(+), 5 deletions(-) create mode 100644 modules/nf-core/bcftools/annotate/environment.yml create mode 100644 modules/nf-core/bcftools/annotate/main.nf create mode 100644 modules/nf-core/bcftools/annotate/meta.yml create mode 100644 modules/nf-core/bcftools/annotate/tests/main.nf.test create mode 100644 modules/nf-core/bcftools/annotate/tests/main.nf.test.snap create mode 100644 modules/nf-core/bcftools/annotate/tests/nextflow.config create mode 100644 modules/nf-core/bcftools/isec/environment.yml create mode 100644 modules/nf-core/bcftools/isec/main.nf create mode 100644 modules/nf-core/bcftools/isec/meta.yml create mode 100644 modules/nf-core/bcftools/isec/tests/main.nf.test create mode 100644 modules/nf-core/bcftools/isec/tests/main.nf.test.snap create mode 100644 modules/nf-core/bcftools/isec/tests/nextflow.config create mode 100644 modules/nf-core/bcftools/norm/environment.yml create mode 100644 modules/nf-core/bcftools/norm/main.nf create mode 100644 modules/nf-core/bcftools/norm/meta.yml create mode 100644 modules/nf-core/bcftools/norm/tests/main.nf.test create mode 100644 modules/nf-core/bcftools/norm/tests/main.nf.test.snap create mode 100644 modules/nf-core/bcftools/norm/tests/nextflow.bcf.config create mode 100644 modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config create mode 100644 modules/nf-core/bcftools/norm/tests/nextflow.config create mode 100644 modules/nf-core/bcftools/norm/tests/nextflow.vcf.config create mode 100644 modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config create mode 100644 modules/nf-core/bcftools/norm/tests/vcf_gz_index.config create mode 100644 modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config create mode 100644 modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config create mode 100644 subworkflows/local/small_variant_consensus.nf diff --git a/modules.json b/modules.json index 2d23478c..8c9ad45c 100644 --- a/modules.json +++ b/modules.json @@ -11,27 +11,37 @@ "installed_by": ["modules"], "patch": "modules/nf-core/ascat/ascat.diff" }, + "bcftools/annotate": { + "branch": "master", + "git_sha": "3d9c2f4beaa4f62b3f006928fd9095a496d1e5a8", + "installed_by": ["modules"] + }, "bcftools/concat": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", "installed_by": ["modules", "vcf_gather_bcftools"] }, + "bcftools/isec": { + "branch": "master", + "git_sha": "3b2c3559699a7bca6a7c2b220695a072e030e17d", + "installed_by": ["modules"] + }, "bcftools/merge": { "branch": "master", "git_sha": "3d9c2f4beaa4f62b3f006928fd9095a496d1e5a8", "installed_by": ["modules"] }, + "bcftools/norm": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "installed_by": ["modules"] + }, "bcftools/sort": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", "installed_by": ["modules", "vcf_gather_bcftools"], "patch": "modules/nf-core/bcftools/sort/bcftools-sort.diff" }, - "deepsomatic": { - "branch": "master", - "git_sha": "ddb0d667cf6cdee3bab9497241de4bbf6b88d8cc", - "installed_by": ["modules"] - }, "deepvariant/callvariants": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", diff --git a/modules/nf-core/bcftools/annotate/environment.yml b/modules/nf-core/bcftools/annotate/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/annotate/main.nf b/modules/nf-core/bcftools/annotate/main.nf new file mode 100644 index 00000000..18778cc2 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/main.nf @@ -0,0 +1,81 @@ +process BCFTOOLS_ANNOTATE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(input), path(index), path(annotations), path(annotations_index), path(columns), path(header_lines), path(rename_chrs) + + output: + tuple val(meta), path("${prefix}.${extension}"), emit: vcf + tuple val(meta), path("${prefix}.${extension}.tbi"), emit: tbi, optional: true + tuple val(meta), path("${prefix}.${extension}.csi"), emit: csi, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def annotations_file = annotations ? "--annotations ${annotations}" : '' + def columns_file = columns ? "--columns-file ${columns}" : '' + def header_file = header_lines ? "--header-lines ${header_lines}" : '' + def rename_chrs_file = rename_chrs ? "--rename-chrs ${rename_chrs}" : '' + extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : "vcf" + def index_command = !index ? "bcftools index ${input}" : '' + + if ("${input}" == "${prefix}.${extension}") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + """ + ${index_command} + + bcftools \\ + annotate \\ + ${args} \\ + ${annotations_file} \\ + ${columns_file} \\ + ${header_file} \\ + ${rename_chrs_file} \\ + --output ${prefix}.${extension} \\ + --threads ${task.cpus} \\ + ${input} + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : "vcf" + def index_extension = args.contains("--write-index=tbi") || args.contains("-W=tbi") + ? "tbi" + : args.contains("--write-index=csi") || args.contains("-W=csi") + ? "csi" + : args.contains("--write-index") || args.contains("-W") ? "csi" : "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index_extension.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index_extension}" : "" + + if ("${input}" == "${prefix}.${extension}") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + """ +} diff --git a/modules/nf-core/bcftools/annotate/meta.yml b/modules/nf-core/bcftools/annotate/meta.yml new file mode 100644 index 00000000..86331661 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/meta.yml @@ -0,0 +1,112 @@ +name: bcftools_annotate +description: Add or remove annotations. +keywords: + - bcftools + - annotate + - vcf + - remove + - add +tools: + - annotate: + description: Add or remove annotations. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: https://samtools.github.io/bcftools/bcftools.html#annotate + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Query VCF or BCF file, can be either uncompressed or compressed + ontologies: [] + - index: + type: file + description: Index of the query VCF or BCF file + ontologies: [] + - annotations: + type: file + description: Bgzip-compressed file with annotations + ontologies: [] + - annotations_index: + type: file + description: Index of the annotations file + ontologies: [] + - columns: + type: file + description: List of columns in the annotations file, one name per row + ontologies: [] + - header_lines: + type: file + description: Contains lines to append to the output VCF header + ontologies: [] + - rename_chrs: + type: file + description: Rename annotations according to this file containing "old_name new_name\n" + pairs separated by whitespaces, each on a separate line. + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}": + type: file + description: Compressed annotated VCF file + pattern: "*{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@projectoriented" + - "@ramprasadn" +maintainers: + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/annotate/tests/main.nf.test b/modules/nf-core/bcftools/annotate/tests/main.nf.test new file mode 100644 index 00000000..3e1d2573 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/main.nf.test @@ -0,0 +1,429 @@ +nextflow_process { + + name "Test Process BCFTOOLS_ANNOTATE" + script "../main.nf" + config "./nextflow.config" + process "BCFTOOLS_ANNOTATE" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/annotate" + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_output") { + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type z" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, [], annotation, annotation_tbi], [], [], [] - vcf_output") { + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type z" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index") { + + when { + params { + args_modules = "--output-type z --write-index --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_csi") { + + when { + params { + args_modules = "--output-type z --write-index=csi --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_tbi") { + + when { + params { + args_modules = "--output-type z --write-index=tbi --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, [], annotation, annotation_tbi], [], header, [] - bcf_output") { + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type u" + } + process { + """ + header = channel.of( + '##INFO=', + '##INFO=' + ).collectFile(name:"headers.vcf", newLine:true) + input[0] = channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [] + ]) + .combine(header) + .combine(channel.of([[]])) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, [], annotation, annotation_tbi], columns, [], [] - bcf_output") { + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type u" + } + process { + """ + columns = channel.of('INFO/ICB', 'INFO/HOB', 'INFO/DP4').collectFile(name:"columns.txt", newLine:true) + input[0] = channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ]).combine(columns) + .combine(channel.of([[], []])) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, [], annotation, annotation_tbi], [], header, rename_chrs - vcf_gz_index") { + + when { + params { + args_modules = "--output-type z --write-index --no-version" + } + process { + """ + headers = channel.of( + '##INFO=', + '##INFO=' + ).collectFile(name:"headers.vcf", newLine:true) + rename = channel.of('MT192765.1 renamed').collectFile(name:"rename.txt", newLine:true) + input[0] = channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [] + ]).combine(headers) + .combine(rename) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - stub") { + + options "-stub" + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type z" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index - stub") { + + options "-stub" + + when { + params { + args_modules = "--output-type z --write-index --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_csi - stub") { + + options "-stub" + + when { + params { + args_modules = "--output-type z --write-index=csi --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_tbi - stub") { + + options "-stub" + + when { + params { + args_modules = "--output-type z --write-index=tbi --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } +} diff --git a/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap b/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap new file mode 100644 index 00000000..10af196a --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap @@ -0,0 +1,440 @@ +{ + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:19.618749659" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,bc7bf3ee9e8430e064c539eb81e59bf9" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.tbi" + ] + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:43.350060834" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,bc7bf3ee9e8430e064c539eb81e59bf9" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:36.101003418" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_tbi - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.tbi" + ] + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:34.19449127" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_csi - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:26.927815399" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,bc7bf3ee9e8430e064c539eb81e59bf9" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:28.891823681" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:12.400301681" + }, + "sarscov2 - [vcf, [], annotation, annotation_tbi], [], header, [] - bcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.bcf" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:50.375384421" + }, + "sarscov2 - [vcf, [], annotation, annotation_tbi], [], [], [] - vcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,250b64289ab9d48f76359d01699fdf7d" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:21.320211288" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,250b64289ab9d48f76359d01699fdf7d" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:13.274072987" + }, + "sarscov2 - [vcf, [], annotation, annotation_tbi], [], header, rename_chrs - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:05.094685409" + }, + "sarscov2 - [vcf, [], annotation, annotation_tbi], columns, [], [] - bcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.bcf" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:57.906382655" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/annotate/tests/nextflow.config b/modules/nf-core/bcftools/annotate/tests/nextflow.config new file mode 100644 index 00000000..10235100 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args = "${params.args_modules}" + ext.prefix = { "${meta.id}_ann" } +} diff --git a/modules/nf-core/bcftools/isec/environment.yml b/modules/nf-core/bcftools/isec/environment.yml new file mode 100644 index 00000000..cb55500b --- /dev/null +++ b/modules/nf-core/bcftools/isec/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::bcftools=1.22 + - bioconda::htslib=1.22.1 diff --git a/modules/nf-core/bcftools/isec/main.nf b/modules/nf-core/bcftools/isec/main.nf new file mode 100644 index 00000000..f39c7101 --- /dev/null +++ b/modules/nf-core/bcftools/isec/main.nf @@ -0,0 +1,47 @@ +process BCFTOOLS_ISEC { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcfs), path(tbis), path(file_list), path(targets_file), path(regions_file) + + output: + tuple val(meta), path("${prefix}", type: "dir"), emit: results + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + targets_file_args = targets_file ? "-T ${targets_file}" : '' + regions_file_args = regions_file ? "-R ${regions_file}" : '' + vcf_files = file_list ? "-l ${file_list}" : "${vcfs}" + + """ + bcftools isec \\ + ${args} \\ + ${targets_file_args} \\ + ${regions_file_args} \\ + -p ${prefix} \\ + ${vcf_files} \\ + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + touch ${prefix}/README.txt + touch ${prefix}/sites.txt + echo "" | gzip > ${prefix}/0000.vcf.gz + touch ${prefix}/0000.vcf.gz.tbi + echo "" | gzip > ${prefix}/0001.vcf.gz + touch ${prefix}/0001.vcf.gz.tbi + """ +} diff --git a/modules/nf-core/bcftools/isec/meta.yml b/modules/nf-core/bcftools/isec/meta.yml new file mode 100644 index 00000000..051e141e --- /dev/null +++ b/modules/nf-core/bcftools/isec/meta.yml @@ -0,0 +1,101 @@ +name: bcftools_isec +description: Apply set operations to VCF files +keywords: + - variant calling + - intersect + - union + - complement + - VCF + - BCF +tools: + - isec: + description: | + Computes intersections, unions and complements of VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: list + description: | + List containing 2 or more vcf/bcf files. These must be compressed and have an associated index. + e.g. [ 'file1.vcf.gz', 'file2.vcf' ] + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: + - edam: "http://edamontology.org/format_3016" # VCF + - edam: "http://edamontology.org/format_3570" # BCF + - tbis: + type: list + description: | + List containing the tbi index files corresponding to the vcf/bcf input files + pattern: "*.tbi" + ontologies: + - edam: "http://edamontology.org/format_3475" # Tabix index + - file_list: + type: file + description: | + Optional text file containing the list of VCF/BCF files to be processed by bcftools isec, one per line. + ontologies: + - edam: "http://edamontology.org/format_2330" # Text file + - targets_file: + type: file + description: | + Optional file containing target regions to restrict the analysis to. + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - edam: "http://edamontology.org/format_3475" # Tab-separated + - regions_file: + type: file + description: | + Optional file containing regions to restrict the analysis to. + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - edam: "http://edamontology.org/format_3475" # Tab-separated +output: + results: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}: + type: directory + description: Directory containing the output files from bcftools isec + pattern: "${prefix}/" + ontologies: + - edam: "http://edamontology.org/format_3016" # VCF + - edam: "http://edamontology.org/format_3570" # BCF + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - bcftools --version | sed '1!d; s/^.*bcftools //': + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - bcftools --version | sed '1!d; s/^.*bcftools //': + type: string + description: The command used to generate the version of the tool +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/isec/tests/main.nf.test b/modules/nf-core/bcftools/isec/tests/main.nf.test new file mode 100644 index 00000000..d0a1f751 --- /dev/null +++ b/modules/nf-core/bcftools/isec/tests/main.nf.test @@ -0,0 +1,318 @@ +nextflow_process { + + name "Test Process BCFTOOLS_ISEC" + script "../main.nf" + process "BCFTOOLS_ISEC" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/isec" + + config "./nextflow.config" + + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]]") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [], + [] + ] + """ + } + } + + then { + def results_dir = new File(process.out.results[0][1]) + def results_list = [] + results_dir.eachFileRecurse { file -> results_list << file.getName() } + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions") }, + results_list.sort(), + path("${process.out.results[0][1]}").list().findAll { + it.getFileName().toString() != "0000.vcf.gz.tbi" && it.getFileName().toString() != "0001.vcf.gz.tbi" + } + ).match() + } + ) + } + } + + test("sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - targets") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)], + [] + ] + """ + } + } + + then { + def results_dir = new File(process.out.results[0][1]) + def results_list = [] + results_dir.eachFileRecurse { file -> results_list << file.getName() } + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions") }, + results_list.sort(), + path("${process.out.results[0][1]}").list().findAll { + it.getFileName().toString() != "0000.vcf.gz.tbi" && it.getFileName().toString() != "0001.vcf.gz.tbi" && it.getFileName().toString() != "sites.txt" + } + ).match() + } + ) + } + } + + test("sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - targets - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - regions") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [], + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)] + ] + """ + } + } + + then { + def results_dir = new File(process.out.results[0][1]) + def results_list = [] + results_dir.eachFileRecurse { file -> results_list << file.getName() } + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions") }, + results_list.sort(), + path("${process.out.results[0][1]}").list().findAll { + it.getFileName().toString() != "0000.vcf.gz.tbi" && it.getFileName().toString() != "0001.vcf.gz.tbi" && it.getFileName().toString() != "sites.txt" + } + ).match() + } + ) + } + } + + test("sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - regions - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [], + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - file_list") { + + setup{ + new File("${launchDir}/file_list.txt").text = """ + test.vcf.gz + test2.vcf.gz + """.stripIndent().trim() + } + + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [file("${launchDir}/file_list.txt", checkIfExists: true)], + [], + [] + ] + """ + } + } + + then { + def results_dir = new File(process.out.results[0][1]) + def results_list = [] + results_dir.eachFileRecurse { file -> results_list << file.getName() } + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions") }, + results_list.sort(), + path("${process.out.results[0][1]}").list().findAll { + it.getFileName().toString() != "0000.vcf.gz.tbi" && it.getFileName().toString() != "0001.vcf.gz.tbi" + } + ).match() + } + ) + } + } + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - file_list - stub") { + options "-stub" + + setup{ + new File("${launchDir}/file_list.txt").text = """ + test.vcf.gz + test2.vcf.gz + """.stripIndent().trim() + } + + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [file("${launchDir}/file_list.txt", checkIfExists: true)], + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + + ) + } + } +} diff --git a/modules/nf-core/bcftools/isec/tests/main.nf.test.snap b/modules/nf-core/bcftools/isec/tests/main.nf.test.snap new file mode 100644 index 00000000..8a2cb65f --- /dev/null +++ b/modules/nf-core/bcftools/isec/tests/main.nf.test.snap @@ -0,0 +1,348 @@ +{ + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]]": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + }, + [ + "0000.vcf.gz", + "0000.vcf.gz.tbi", + "0001.vcf.gz", + "0001.vcf.gz.tbi", + "README.txt", + "sites.txt" + ], + [ + "0000.vcf.gz:md5,8e722884ffb75155212a3fc053918766", + "0001.vcf.gz:md5,b39a72f91458b94b346dd73690207649", + "README.txt:md5,10fc33b66522645600d44afbd41fb792", + "sites.txt:md5,1cea3fbde7f6d3c97f3d39036f9690df" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:48.711543241" + }, + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - targets": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + }, + [ + "0000.vcf.gz", + "0000.vcf.gz.tbi", + "0001.vcf.gz", + "0001.vcf.gz.tbi", + "README.txt", + "sites.txt" + ], + [ + "0000.vcf.gz:md5,565cbbb0d930be20fc235604da695623", + "0001.vcf.gz:md5,d65e9e45a4c5f45873cb26b80c81b213", + "README.txt:md5,f4190b7943f8f12886ad57ecaedd0c43" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-28T11:48:46.533255686" + }, + "sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - regions - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ], + "results": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-23T19:06:04.239620535" + }, + "sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - targets - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ], + "results": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-23T18:58:08.73508502" + }, + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - file_list": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + }, + [ + "0000.vcf.gz", + "0000.vcf.gz.tbi", + "0001.vcf.gz", + "0001.vcf.gz.tbi", + "README.txt", + "sites.txt" + ], + [ + "0000.vcf.gz:md5,8e722884ffb75155212a3fc053918766", + "0001.vcf.gz:md5,b39a72f91458b94b346dd73690207649", + "README.txt:md5,4426b6b26b177d85e150f06bd5138411", + "sites.txt:md5,1cea3fbde7f6d3c97f3d39036f9690df" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-27T12:52:50.066330847" + }, + "sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ], + "results": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:56.874977547" + }, + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - regions": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + }, + [ + "0000.vcf.gz", + "0000.vcf.gz.tbi", + "0001.vcf.gz", + "0001.vcf.gz.tbi", + "README.txt", + "sites.txt" + ], + [ + "0000.vcf.gz:md5,565cbbb0d930be20fc235604da695623", + "0001.vcf.gz:md5,d65e9e45a4c5f45873cb26b80c81b213", + "README.txt:md5,16eeab1b2463bab4d498a4dfdaa297fa" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-28T11:49:26.428693544" + }, + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - file_list - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ], + "results": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-28T11:55:27.123701797" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/isec/tests/nextflow.config b/modules/nf-core/bcftools/isec/tests/nextflow.config new file mode 100644 index 00000000..ac887d6b --- /dev/null +++ b/modules/nf-core/bcftools/isec/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--nfiles +2 --output-type z --no-version' +} diff --git a/modules/nf-core/bcftools/norm/environment.yml b/modules/nf-core/bcftools/norm/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/norm/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf new file mode 100644 index 00000000..443c8bbb --- /dev/null +++ b/modules/nf-core/bcftools/norm/main.nf @@ -0,0 +1,71 @@ +process BCFTOOLS_NORM { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi"), emit: tbi, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf.gz" + """ + bcftools norm \\ + --fasta-ref ${fasta} \\ + --output ${prefix}.${extension} \\ + ${args} \\ + --threads ${task.cpus} \\ + ${vcf} + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf.gz" + def index = '' + if (extension in ['vcf.gz', 'bcf', 'bcf.gz']) { + if (['--write-index=tbi', '-W=tbi'].any { arg -> args.contains(arg) } && extension == 'vcf.gz') { + index = 'tbi' + } + else if (['--write-index=tbi', '-W=tbi', '--write-index=csi', '-W=csi', '--write-index', '-W'].any { arg -> args.contains(arg) }) { + index = 'csi' + } + } + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = index ? "touch ${prefix}.${extension}.${index}" : "" + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + """ +} diff --git a/modules/nf-core/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml new file mode 100644 index 00000000..9feecac0 --- /dev/null +++ b/modules/nf-core/bcftools/norm/meta.yml @@ -0,0 +1,107 @@ +name: bcftools_norm +description: Normalize VCF file +keywords: + - normalize + - norm + - variant calling + - VCF +tools: + - norm: + description: | + Normalize VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be normalized + e.g. 'file1.vcf' + pattern: "*.{vcf,vcf.gz}" + ontologies: [] + - tbi: + type: file + description: | + An optional index of the VCF file (for when the VCF is compressed) + pattern: "*.vcf.gz.tbi" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: One of uncompressed VCF (.vcf), compressed VCF (.vcf.gz), compressed + BCF (.bcf.gz) or uncompressed BCF (.bcf) normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@abhi18av" + - "@ramprasadn" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test b/modules/nf-core/bcftools/norm/tests/main.nf.test new file mode 100644 index 00000000..05851753 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test @@ -0,0 +1,545 @@ +nextflow_process { + + name "Test Process BCFTOOLS_NORM" + script "../main.nf" + process "BCFTOOLS_NORM" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/norm" + + test("sarscov2 - [ vcf, [] ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output") { + + config "./nextflow.bcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output") { + + config "./nextflow.bcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta -stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output -stub") { + + config "./nextflow.vcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output - stub") { + + config "./nextflow.bcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub") { + + config "./nextflow.bcf_gz.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } + + +} diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test.snap b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap new file mode 100644 index 00000000..ee2dadf7 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap @@ -0,0 +1,876 @@ +{ + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:29.987030961" + }, + "sarscov2 - [ vcf, [] ], fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:06.488086505" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:34.863776359" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:54.718705045" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:43.007377633" + }, + "sarscov2 - [ vcf, [] ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:07:54.877084219" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:22.220435939" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,bf88706ef69c44ca9e287bc953ba3593" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,bf88706ef69c44ca9e287bc953ba3593" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:58.483532889" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:10:03.22576704" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:28.356741947" + }, + "sarscov2 - [ vcf, tbi ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:27.281315407" + }, + "sarscov2 - [ vcf, tbi ], fasta -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:14.249715835" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:46.665932019" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T07:52:58.381931979" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:38.144449162" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:10:10.602984345" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:09.808834237" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:51.053195842" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config new file mode 100644 index 00000000..b79af868 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type b --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config new file mode 100644 index 00000000..f36f397c --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type u --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.config b/modules/nf-core/bcftools/norm/tests/nextflow.config new file mode 100644 index 00000000..510803b4 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config new file mode 100644 index 00000000..10bf93e3 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type v --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config new file mode 100644 index 00000000..b31dd2de --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type z ---no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config new file mode 100644 index 00000000..7dd696ee --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..aebffb6f --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..b192ae7d --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/subworkflows/local/small_variant_consensus.nf b/subworkflows/local/small_variant_consensus.nf new file mode 100644 index 00000000..5afdd3ca --- /dev/null +++ b/subworkflows/local/small_variant_consensus.nf @@ -0,0 +1,6 @@ + +workflow SMALL_VARIANT_CONSENSUS.nf{ + take: + ch_vcf + main: +} \ No newline at end of file From a5ca08a4e30bb6a0f4a93cc8d4b9d8ed246eeed2 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Fri, 13 Mar 2026 17:01:07 +0100 Subject: [PATCH 07/36] bcftools query --- modules.json | 5 + .../nf-core/bcftools/query/environment.yml | 10 ++ modules/nf-core/bcftools/query/main.nf | 46 +++++++++ modules/nf-core/bcftools/query/meta.yml | 89 +++++++++++++++++ .../nf-core/bcftools/query/tests/main.nf.test | 97 +++++++++++++++++++ .../bcftools/query/tests/main.nf.test.snap | 73 ++++++++++++++ .../bcftools/query/tests/nextflow.config | 3 + 7 files changed, 323 insertions(+) create mode 100644 modules/nf-core/bcftools/query/environment.yml create mode 100644 modules/nf-core/bcftools/query/main.nf create mode 100644 modules/nf-core/bcftools/query/meta.yml create mode 100644 modules/nf-core/bcftools/query/tests/main.nf.test create mode 100644 modules/nf-core/bcftools/query/tests/main.nf.test.snap create mode 100644 modules/nf-core/bcftools/query/tests/nextflow.config diff --git a/modules.json b/modules.json index 8c9ad45c..f188df47 100644 --- a/modules.json +++ b/modules.json @@ -36,6 +36,11 @@ "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", "installed_by": ["modules"] }, + "bcftools/query": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "installed_by": ["modules"] + }, "bcftools/sort": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", diff --git a/modules/nf-core/bcftools/query/environment.yml b/modules/nf-core/bcftools/query/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/query/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/query/main.nf b/modules/nf-core/bcftools/query/main.nf new file mode 100644 index 00000000..726360f0 --- /dev/null +++ b/modules/nf-core/bcftools/query/main.nf @@ -0,0 +1,46 @@ +process BCFTOOLS_QUERY { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf), path(tbi) + path regions + path targets + path samples + + output: + tuple val(meta), path("*.${suffix}"), emit: output + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + """ + bcftools query \\ + ${regions_file} \\ + ${targets_file} \\ + ${samples_file} \\ + ${args} \\ + ${vcf} \\ + > ${prefix}.${suffix} + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + """ + touch ${prefix}.${suffix} \\ + """ +} diff --git a/modules/nf-core/bcftools/query/meta.yml b/modules/nf-core/bcftools/query/meta.yml new file mode 100644 index 00000000..6bcb5e57 --- /dev/null +++ b/modules/nf-core/bcftools/query/meta.yml @@ -0,0 +1,89 @@ +name: bcftools_query +description: Extracts fields from VCF or BCF files and outputs them in user-defined + format. +keywords: + - query + - variant calling + - bcftools + - VCF +tools: + - query: + description: | + Extracts fields from VCF or BCF files and outputs them in user-defined format. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be queried. + pattern: "*.{vcf.gz, vcf}" + ontologies: [] + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. + pattern: "*.tbi" + ontologies: [] + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + ontologies: [] + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + ontologies: [] + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + ontologies: [] +output: + output: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${suffix}": + type: file + description: BCFTools query output file + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@abhi18av" + - "@drpatelh" +maintainers: + - "@abhi18av" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test b/modules/nf-core/bcftools/query/tests/main.nf.test new file mode 100644 index 00000000..63ac5af8 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test @@ -0,0 +1,97 @@ +nextflow_process { + + name "Test Process BCFTOOLS_QUERY" + script "../main.nf" + process "BCFTOOLS_QUERY" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/query" + + config "./nextflow.config" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.output[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } +} diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test.snap b/modules/nf-core/bcftools/query/tests/main.nf.test.snap new file mode 100644 index 00000000..5168ef3f --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test.snap @@ -0,0 +1,73 @@ +{ + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,75a6bd0084e2e1838cf7baba11b99d19" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_QUERY", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:16:54.523612853" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.txt", + { + "versions_bcftools": [ + [ + "BCFTOOLS_QUERY", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:17:00.64798632" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,87a2ab194e1ee3219b44e58429ec3307" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_QUERY", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:16:47.953130141" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/tests/nextflow.config b/modules/nf-core/bcftools/query/tests/nextflow.config new file mode 100644 index 00000000..8547ec10 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "-f '%CHROM %POS %REF %ALT[%SAMPLE=%GT]'" +} From 20437ffaddc10daccd1b3a41e52697a7bcfdcdec Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Fri, 13 Mar 2026 17:11:06 +0100 Subject: [PATCH 08/36] preliminary small variant consensus calling stuff --- conf/modules.config | 33 +++++++++++ modules/nf-core/bcftools/isec/main.nf | 2 + subworkflows/local/small_variant_consensus.nf | 58 ++++++++++++++++++- 3 files changed, 92 insertions(+), 1 deletion(-) diff --git a/conf/modules.config b/conf/modules.config index 697c1c2c..dae36220 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -87,6 +87,39 @@ process { ] } + withName: '.*:BCFTOOLS_NORM' { + publishDir = [ + enabled: false + ] + } + + withName: '.*:BCFTOOLS_ISEC' { + ext.args{ + "-n=2 -c all -w1 -Oz" + } + publishDir = [ + enabled: false + ] + } + withName: '.*:BCFTOOLS_ANNOTATE' { + ext.args{ + "-h <(echo '##INFO=') \\ + -c CHROM,POS,REF,ALT,INFO/CALLER \\ + -Oz" + } + publishDir = [ + enabled: false + ] + } + withName: '.*:BCFTOOLS_QUERY' { + ext.args{ + "-f '%CHROM\t%POS\t%REF\t%ALT\t${meta.variant_caller}\n'" + } + publishDir = [ + enabled: false + ] + } + withName: '.*DEEPVARIANT_MAKEEXAMPLES' { ext.args = { meta.platform == 'pb' diff --git a/modules/nf-core/bcftools/isec/main.nf b/modules/nf-core/bcftools/isec/main.nf index f39c7101..afa71069 100644 --- a/modules/nf-core/bcftools/isec/main.nf +++ b/modules/nf-core/bcftools/isec/main.nf @@ -12,6 +12,7 @@ process BCFTOOLS_ISEC { output: tuple val(meta), path("${prefix}", type: "dir"), emit: results + tuple val(meta), path("${prefix}_consensus.vcf.gz"), emit: consensus tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools when: @@ -27,6 +28,7 @@ process BCFTOOLS_ISEC { """ bcftools isec \\ ${args} \\ + -o ${$prefix}_consensus.vcf.gz ${targets_file_args} \\ ${regions_file_args} \\ -p ${prefix} \\ diff --git a/subworkflows/local/small_variant_consensus.nf b/subworkflows/local/small_variant_consensus.nf index 5afdd3ca..6bb840c6 100644 --- a/subworkflows/local/small_variant_consensus.nf +++ b/subworkflows/local/small_variant_consensus.nf @@ -1,6 +1,62 @@ +include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_ISEC } from '../../modules/nf-core/bcftools/isec/main' +include { BCFTOOLS_MERGE } from '../../modules/nf-core/bcftools/merge/main' +include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/merge/main' + + workflow SMALL_VARIANT_CONSENSUS.nf{ take: - ch_vcf + ch_vcf // [meta, vcfs, tbis] + fasta + fai + var_keep_method + main: + + if (var_keep_method == 'consensus') { + BCFTOOLS_NORM(ch_vcf, fasta) + BCFTOOLS_NORM.out.vcf + .join(BCFTOOLS_NORM.tbi) + .map{ meta, vcf, tbi -> + def file = [] + def target = [] + def regions = [] + return [meta, vcf, tbi,file,target,regions] + } + .set{isec_input} + BCFTOOLS_ISEC(isec_input) + } + else if (var_keep_method == 'all'){ + BCFTOOLS_NORM(ch_vcf,fasta) + BCFTOOLS_NORM.out.vcf + .join(BCFTOOLS_NORM.tbi) + .set { vcf_tbi} + BCFTOOLS_QUERY(vcf_tbi, [], [], []) + vcf_tbi + .join(BCFTOOLS_QUERY.out.output) + .map{ meta, vcf, tbi, annotations -> + def annotations_index = [] + def columns = [] + def header_lines = [] + def rename_chrs = [] + return [ meta, vcf, tbi,file,annotations,annotations_index, columns, header_lines, rename_chrs ] + } + .set{annotate_input} + + BCFTOOLS_ANNOTATE(ch_vcf) + + BCFTOOLS_ANNOTATE.out.vcf + .join(BCFTOOLS_ANNOTATE.out.tbi) + .map{ meta, vcf, tbi -> + def bed = [] + return [ bed ] + } + .set{ merge input} + fasta + .join(fai) + .set{ fasta_fai } + BCFTOOLS_MERGE(merge_input, fasta_fai) + } + } \ No newline at end of file From 0ac7404cbc69bf4b5ed069413030868de4965b60 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Mon, 16 Mar 2026 10:47:35 +0100 Subject: [PATCH 09/36] fix channel structure --- conf/modules.config | 2 +- subworkflows/local/small_variant_consensus.nf | 64 +++++++++++++------ subworkflows/local/tumor_normal_happhase.nf | 27 ++++++++ 3 files changed, 72 insertions(+), 21 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index dae36220..d742bb3d 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -113,7 +113,7 @@ process { } withName: '.*:BCFTOOLS_QUERY' { ext.args{ - "-f '%CHROM\t%POS\t%REF\t%ALT\t${meta.variant_caller}\n'" + "-f '%CHROM\t%POS\t%REF\t%ALT\t${meta.caller}\n'" } publishDir = [ enabled: false diff --git a/subworkflows/local/small_variant_consensus.nf b/subworkflows/local/small_variant_consensus.nf index 6bb840c6..8be88766 100644 --- a/subworkflows/local/small_variant_consensus.nf +++ b/subworkflows/local/small_variant_consensus.nf @@ -7,44 +7,68 @@ include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/merge/mai workflow SMALL_VARIANT_CONSENSUS.nf{ take: - ch_vcf // [meta, vcfs, tbis] + mixed_vcfs // [meta: w caller_info,mixed_vcfs, mixed_indicies] fasta fai var_keep_method main: + //normalize VCFs + BCFTOOLS_NORM(mixed_vcfs, fasta) + BCFTOOLS_NORM.out.vcf + .join(BCFTOOLS_NORM.tbi) + .set {normalized_vcfs} + + // create annotation file with caller name + BCFTOOLS_QUERY(normalized_vcfs, [], [], []) + + normalized_vcfs + .join(BCFTOOLS_QUERY.out.output) + .map{ meta, vcf, tbi, annotations -> + def annotations_index = [] + def columns = [] + def header_lines = [] + def rename_chrs = [] + return [ meta, vcf, tbi,file,annotations,annotations_index, columns, header_lines, rename_chrs ] + } + .set{annotate_input} + + // Annotate vcfs with caller id + BCFTOOLS_ANNOTATE(annotate_input) + + BCFTOOLS_ANNOTATE.out.vcf + .join(BCFTOOLS_ANNOTATE.tbi) + .set{annotated_vcfs} + + annotated_vcfs + .branch { meta, _vcfs, _tbi -> + deepvariant: meta.caller in [ 'deepvariant', 'deepsomatic' ] + clair: meta.caller in ['clair3','clairs-to','clairs'] + } + .set{annotated_vcfs_branched} + + clair_ch = annotated_vcfs_branched.clair + deepvariant = annotated_vcfs_branched.deepvariant + + clair_ch. + map {meta, vcfs, tbi -> + meta + } if (var_keep_method == 'consensus') { - BCFTOOLS_NORM(ch_vcf, fasta) + BCFTOOLS_NORM.out.vcf .join(BCFTOOLS_NORM.tbi) .map{ meta, vcf, tbi -> def file = [] def target = [] def regions = [] - return [meta, vcf, tbi,file,target,regions] + return [meta, vcf, tbi, file, target, regions] } .set{isec_input} BCFTOOLS_ISEC(isec_input) } else if (var_keep_method == 'all'){ - BCFTOOLS_NORM(ch_vcf,fasta) - BCFTOOLS_NORM.out.vcf - .join(BCFTOOLS_NORM.tbi) - .set { vcf_tbi} - BCFTOOLS_QUERY(vcf_tbi, [], [], []) - vcf_tbi - .join(BCFTOOLS_QUERY.out.output) - .map{ meta, vcf, tbi, annotations -> - def annotations_index = [] - def columns = [] - def header_lines = [] - def rename_chrs = [] - return [ meta, vcf, tbi,file,annotations,annotations_index, columns, header_lines, rename_chrs ] - } - .set{annotate_input} - - BCFTOOLS_ANNOTATE(ch_vcf) BCFTOOLS_ANNOTATE.out.vcf .join(BCFTOOLS_ANNOTATE.out.tbi) diff --git a/subworkflows/local/tumor_normal_happhase.nf b/subworkflows/local/tumor_normal_happhase.nf index 9a0698ee..bedc7542 100644 --- a/subworkflows/local/tumor_normal_happhase.nf +++ b/subworkflows/local/tumor_normal_happhase.nf @@ -7,6 +7,7 @@ include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/conca include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort' include { DEEPVARIANT } from '../../subworkflows/nf-core/deepvariant/main.nf' include { DEEPSOMATIC } from '../../subworkflows/local/deepsomatic.nf' +include { SMALL_VARIANT_CONSENSUS } from '../../subworkflows/local/small_variant_consensus.nf' workflow TUMOR_NORMAL_HAPPHASE { @@ -124,6 +125,32 @@ workflow TUMOR_NORMAL_HAPPHASE { [[:],[]] ) + DEEPVARIANT.out.vcf + .join(DEEPVARIANT.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepvariant'] + return [new_meta, vcf, tbi] + } + .set{deepvariant_ch} + + CLAIR3.out.vcf + .join(CLAIR3.out.tbi) + .map { meta, vcf , tbi -> + def new_meta = meta + [caller:'clair'] + } + .set{clair3_ch} + // [meta,deepvar_vcf,deepvar_index,clair3_vcf,clair3_index] + clair3_ch + .mix(deepvariant_ch) + .set{mixed_vcfs} + SMALL_VARIANT_CONSENSUS( + mixed_vcfs, + fasta, + fai + params.germline_var_keep + ) + + // Add germline vcf to normal bams // remove clair3 model information From c3635d8e4e6846d57214a7cfffebeb235fc0550f Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Mon, 16 Mar 2026 14:37:12 +0100 Subject: [PATCH 10/36] preliminary germline merging channel structure --- conf/modules.config | 22 +++-- modules/nf-core/bcftools/isec/main.nf | 8 +- modules/nf-core/bcftools/query/main.nf | 5 +- nextflow.config | 3 + subworkflows/local/small_variant_consensus.nf | 94 ++++++++++++++----- subworkflows/local/tumor_normal_happhase.nf | 16 +++- subworkflows/local/tumor_only_happhase.nf | 29 +++++- 7 files changed, 136 insertions(+), 41 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f3fccc13..1f2ddd84 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -21,7 +21,6 @@ process { // // QC Processes - // withName: '.*:MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } @@ -107,31 +106,38 @@ process { } withName: '.*:BCFTOOLS_NORM' { + ext.prefix = { "${meta.id}.${meta.caller}_norm" } + ext.args = { + "-Oz -W=tbi" + } publishDir = [ enabled: false ] } withName: '.*:BCFTOOLS_ISEC' { - ext.args{ - "-n=2 -c all -w1 -Oz" + ext.prefix = { "${meta.id}_isec" } + ext.args ={ + "-n=2 -c all -Oz" } publishDir = [ enabled: false ] } withName: '.*:BCFTOOLS_ANNOTATE' { - ext.args{ - "-h <(echo '##INFO=') \\ - -c CHROM,POS,REF,ALT,INFO/CALLER \\ - -Oz" + ext.prefix = { "${meta.id}.${meta.caller}" } + ext.args = { + '''-h <(echo '##INFO=') \ + -c CHROM,POS,REF,ALT,INFO/CALLER \ + -Oz \ + -W=tbi''' } publishDir = [ enabled: false ] } withName: '.*:BCFTOOLS_QUERY' { - ext.args{ + ext.args = { "-f '%CHROM\t%POS\t%REF\t%ALT\t${meta.caller}\n'" } publishDir = [ diff --git a/modules/nf-core/bcftools/isec/main.nf b/modules/nf-core/bcftools/isec/main.nf index afa71069..1536c523 100644 --- a/modules/nf-core/bcftools/isec/main.nf +++ b/modules/nf-core/bcftools/isec/main.nf @@ -12,7 +12,10 @@ process BCFTOOLS_ISEC { output: tuple val(meta), path("${prefix}", type: "dir"), emit: results - tuple val(meta), path("${prefix}_consensus.vcf.gz"), emit: consensus + tuple val(meta), path("${prefix}/0000.vcf.gz"), emit: deepvar_style_consensus_vcf + tuple val(meta), path("${prefix}/0000.vcf.gz.tbi"), emit: deepvar_style_consensus_tbi + tuple val(meta), path("${prefix}/0001.vcf.gz"), emit: clair3_style_consensus_vcf + tuple val(meta), path("${prefix}/0001.vcf.gz.tbi"), emit: clair3_style_consensus_tbi tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools when: @@ -28,11 +31,10 @@ process BCFTOOLS_ISEC { """ bcftools isec \\ ${args} \\ - -o ${$prefix}_consensus.vcf.gz ${targets_file_args} \\ ${regions_file_args} \\ -p ${prefix} \\ - ${vcf_files} \\ + ${vcf_files} """ stub: diff --git a/modules/nf-core/bcftools/query/main.nf b/modules/nf-core/bcftools/query/main.nf index 726360f0..996214f1 100644 --- a/modules/nf-core/bcftools/query/main.nf +++ b/modules/nf-core/bcftools/query/main.nf @@ -14,7 +14,8 @@ process BCFTOOLS_QUERY { path samples output: - tuple val(meta), path("*.${suffix}"), emit: output + tuple val(meta), path("*.${suffix}.gz"), emit: output + tuple val(meta), path("*.${suffix}.gz.tbi"), emit: index tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools when: @@ -35,6 +36,8 @@ process BCFTOOLS_QUERY { ${args} \\ ${vcf} \\ > ${prefix}.${suffix} + bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz + tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz """ stub: diff --git a/nextflow.config b/nextflow.config index c1315f99..68c9fd9b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,9 @@ params { // Input options input = null + germline_var_keep = 'all' + + // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes/' diff --git a/subworkflows/local/small_variant_consensus.nf b/subworkflows/local/small_variant_consensus.nf index 8be88766..6ad1d12c 100644 --- a/subworkflows/local/small_variant_consensus.nf +++ b/subworkflows/local/small_variant_consensus.nf @@ -1,11 +1,13 @@ -include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm/main' -include { BCFTOOLS_ISEC } from '../../modules/nf-core/bcftools/isec/main' -include { BCFTOOLS_MERGE } from '../../modules/nf-core/bcftools/merge/main' -include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/merge/main' +include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_ISEC } from '../../modules/nf-core/bcftools/isec/main' +include { BCFTOOLS_MERGE } from '../../modules/nf-core/bcftools/merge/main' +include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/query/main' +include { BCFTOOLS_ANNOTATE } from '../../modules/nf-core/bcftools/annotate/main' -workflow SMALL_VARIANT_CONSENSUS.nf{ + +workflow SMALL_VARIANT_CONSENSUS { take: mixed_vcfs // [meta: w caller_info,mixed_vcfs, mixed_indicies] fasta @@ -17,7 +19,7 @@ workflow SMALL_VARIANT_CONSENSUS.nf{ BCFTOOLS_NORM(mixed_vcfs, fasta) BCFTOOLS_NORM.out.vcf - .join(BCFTOOLS_NORM.tbi) + .join(BCFTOOLS_NORM.out.tbi) .set {normalized_vcfs} // create annotation file with caller name @@ -25,12 +27,12 @@ workflow SMALL_VARIANT_CONSENSUS.nf{ normalized_vcfs .join(BCFTOOLS_QUERY.out.output) - .map{ meta, vcf, tbi, annotations -> - def annotations_index = [] + .join(BCFTOOLS_QUERY.out.index) + .map{ meta, vcf, tbi, annotations, annotations_index -> def columns = [] def header_lines = [] def rename_chrs = [] - return [ meta, vcf, tbi,file,annotations,annotations_index, columns, header_lines, rename_chrs ] + return [ meta, vcf, tbi, annotations, annotations_index, columns, header_lines, rename_chrs ] } .set{annotate_input} @@ -38,7 +40,7 @@ workflow SMALL_VARIANT_CONSENSUS.nf{ BCFTOOLS_ANNOTATE(annotate_input) BCFTOOLS_ANNOTATE.out.vcf - .join(BCFTOOLS_ANNOTATE.tbi) + .join(BCFTOOLS_ANNOTATE.out.tbi) .set{annotated_vcfs} annotated_vcfs @@ -49,38 +51,84 @@ workflow SMALL_VARIANT_CONSENSUS.nf{ .set{annotated_vcfs_branched} clair_ch = annotated_vcfs_branched.clair - deepvariant = annotated_vcfs_branched.deepvariant + deepvariant_ch = annotated_vcfs_branched.deepvariant clair_ch. map {meta, vcfs, tbi -> - meta + def new_meta = meta.subMap('id', + 'paired_data', + 'type', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, vcfs, tbi] } - if (var_keep_method == 'consensus') { + .set{clair_ch} - BCFTOOLS_NORM.out.vcf - .join(BCFTOOLS_NORM.tbi) - .map{ meta, vcf, tbi -> + deepvariant_ch + .map {meta, vcfs, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'type', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, vcfs, tbi] + } + .set{deepvariant_ch} + + deepvariant_ch + .join(clair_ch) + .map { meta, deepvar_vcf, deepvar_tbi, clair_vcf, clair_tbi -> + def vcfs = [deepvar_vcf, clair_vcf] + def tbis = [deepvar_tbi, clair_tbi] + return [ meta, vcfs, tbis] + } + .set{mixed_vcfs} + + if (var_keep_method == 'consensus') { + mixed_vcfs + .map{ meta, vcfs, tbis -> def file = [] def target = [] def regions = [] - return [meta, vcf, tbi, file, target, regions] + return [meta, vcfs, tbis, file, target, regions] } .set{isec_input} BCFTOOLS_ISEC(isec_input) + BCFTOOLS_ISEC.out.deepvar_style_consensus_vcf + .set{vcf} + BCFTOOLS_ISEC.out.deepvar_style_consensus_tbi + .set{tbi} } + else if (var_keep_method == 'all'){ - - BCFTOOLS_ANNOTATE.out.vcf - .join(BCFTOOLS_ANNOTATE.out.tbi) - .map{ meta, vcf, tbi -> + mixed_vcfs + .map{ meta, vcfs, tbis -> def bed = [] - return [ bed ] + return [ meta, vcfs, tbis, bed ] } - .set{ merge input} + .set{ merge_input} fasta .join(fai) .set{ fasta_fai } BCFTOOLS_MERGE(merge_input, fasta_fai) + BCFTOOLS_MERGE.out.vcf + .set{vcf} + BCFTOOLS_MERGE.out.index + .set{tbi} } + emit: + vcf + tbi + } \ No newline at end of file diff --git a/subworkflows/local/tumor_normal_happhase.nf b/subworkflows/local/tumor_normal_happhase.nf index a8aeffad..159dd906 100644 --- a/subworkflows/local/tumor_normal_happhase.nf +++ b/subworkflows/local/tumor_normal_happhase.nf @@ -128,26 +128,30 @@ workflow TUMOR_NORMAL_HAPPHASE { CLAIR3.out.vcf .join(CLAIR3.out.tbi) .map { meta, vcf , tbi -> - def new_meta = meta + [caller:'clair'] + def new_meta = meta + [caller:'clair3'] + return [new_meta, vcf, tbi] } .set{clair3_ch} // [meta,deepvar_vcf,deepvar_index,clair3_vcf,clair3_index] + clair3_ch .mix(deepvariant_ch) .set{mixed_vcfs} + SMALL_VARIANT_CONSENSUS( mixed_vcfs, fasta, - fai + fai, params.germline_var_keep ) // Add germline vcf to normal bams // remove clair3 model information - + SMALL_VARIANT_CONSENSUS.out.vcf.view() + normal_bams.view() normal_bams - .join(CLAIR3.out.vcf) + .join(SMALL_VARIANT_CONSENSUS.out.vcf) .map { meta, bam, bai, _clair3_model, _platform, vcf -> def svs = [] def mods = [] @@ -156,7 +160,7 @@ workflow TUMOR_NORMAL_HAPPHASE { .set{ normal_bams_germlinevcf } // [meta, bam, bai, germline_vcf, [], []] -- svs and mods are empty placeholders for LONGPHASE_PHASE input - CLAIR3.out.vcf + SMALL_VARIANT_CONSENSUS.out.vcf .map { meta, vcf -> def extra = [] return [meta, vcf, extra] @@ -282,6 +286,8 @@ workflow TUMOR_NORMAL_HAPPHASE { } .set{ deepsomatic_input } + + DEEPSOMATIC ( deepsomatic_input, [[:],[]], diff --git a/subworkflows/local/tumor_only_happhase.nf b/subworkflows/local/tumor_only_happhase.nf index 7759d3d0..48b25fed 100644 --- a/subworkflows/local/tumor_only_happhase.nf +++ b/subworkflows/local/tumor_only_happhase.nf @@ -5,6 +5,7 @@ include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/hapl include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' include { DEEPVARIANT } from '../../subworkflows/nf-core/deepvariant/main.nf' include { DEEPSOMATIC } from '../../subworkflows/local/deepsomatic.nf' +include { SMALL_VARIANT_CONSENSUS } from '../../subworkflows/local/small_variant_consensus.nf' workflow TUMOR_ONLY_HAPPHASE { @@ -94,10 +95,36 @@ workflow TUMOR_ONLY_HAPPHASE { clairsto_vcf ) + VCFSPLIT.out.germline_vcf + .join(VCFSPLIT.out.germline_tbi) + .map { meta, vcf, tbi -> + def new_meta = meta + [caller:'clairs-to'] + return [ new_meta, vcf, tbi] + } + .set{clairsto_germline_ch} + + DEEPVARIANT.out.vcf + .join(DEEPVARIANT.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepvariant'] + return [new_meta, vcf, tbi] + } + .set{deepvariant_ch} + + clairsto_germline_ch + .mix(deepvariant_ch) + .set{mixed_vcfs} + + SMALL_VARIANT_CONSENSUS( + mixed_vcfs, + fasta, + fai, + params.germline_var_keep + ) // Add the nonsomatic vcf info // remove model info tumor_bams - .join(VCFSPLIT.out.germline_vcf) + .join(SMALL_VARIANT_CONSENSUS.out.vcf) .map{ meta, bam, bai, _model, snps -> def svs = [] def mods = [] From 713b11a977903d37055a48bc67f7dc2964e3613b Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Mon, 16 Mar 2026 16:08:38 +0100 Subject: [PATCH 11/36] fix bcftool merge input to stop value channel conversion --- modules/nf-core/bcftools/merge/main.nf | 3 ++- subworkflows/local/small_variant_consensus.nf | 12 +++++------- subworkflows/local/tumor_normal_happhase.nf | 17 +++++++++-------- 3 files changed, 16 insertions(+), 16 deletions(-) diff --git a/modules/nf-core/bcftools/merge/main.nf b/modules/nf-core/bcftools/merge/main.nf index f1acbd3d..f295c0e6 100644 --- a/modules/nf-core/bcftools/merge/main.nf +++ b/modules/nf-core/bcftools/merge/main.nf @@ -9,7 +9,8 @@ process BCFTOOLS_MERGE { input: tuple val(meta), path(vcfs), path(tbis), path(bed) - tuple val(meta2), path(fasta), path(fai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: vcf diff --git a/subworkflows/local/small_variant_consensus.nf b/subworkflows/local/small_variant_consensus.nf index 6ad1d12c..92399dcd 100644 --- a/subworkflows/local/small_variant_consensus.nf +++ b/subworkflows/local/small_variant_consensus.nf @@ -42,7 +42,6 @@ workflow SMALL_VARIANT_CONSENSUS { BCFTOOLS_ANNOTATE.out.vcf .join(BCFTOOLS_ANNOTATE.out.tbi) .set{annotated_vcfs} - annotated_vcfs .branch { meta, _vcfs, _tbi -> deepvariant: meta.caller in [ 'deepvariant', 'deepsomatic' ] @@ -84,7 +83,8 @@ workflow SMALL_VARIANT_CONSENSUS { return [ new_meta, vcfs, tbi] } .set{deepvariant_ch} - + deepvariant_ch.view() + clair_ch.view() deepvariant_ch .join(clair_ch) .map { meta, deepvar_vcf, deepvar_tbi, clair_vcf, clair_tbi -> @@ -93,7 +93,8 @@ workflow SMALL_VARIANT_CONSENSUS { return [ meta, vcfs, tbis] } .set{mixed_vcfs} - + + mixed_vcfs.view() if (var_keep_method == 'consensus') { mixed_vcfs .map{ meta, vcfs, tbis -> @@ -117,10 +118,7 @@ workflow SMALL_VARIANT_CONSENSUS { return [ meta, vcfs, tbis, bed ] } .set{ merge_input} - fasta - .join(fai) - .set{ fasta_fai } - BCFTOOLS_MERGE(merge_input, fasta_fai) + BCFTOOLS_MERGE(merge_input, fasta, fai ) BCFTOOLS_MERGE.out.vcf .set{vcf} BCFTOOLS_MERGE.out.index diff --git a/subworkflows/local/tumor_normal_happhase.nf b/subworkflows/local/tumor_normal_happhase.nf index 159dd906..209a9995 100644 --- a/subworkflows/local/tumor_normal_happhase.nf +++ b/subworkflows/local/tumor_normal_happhase.nf @@ -5,9 +5,12 @@ include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index include { CLAIRS } from '../../modules/local/clairs/main.nf' include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat' include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort' -include { DEEPVARIANT } from '../../subworkflows/nf-core/deepvariant/main.nf' -include { DEEPSOMATIC } from '../../subworkflows/local/deepsomatic.nf' -include { SMALL_VARIANT_CONSENSUS } from '../../subworkflows/local/small_variant_consensus.nf' + +include { DEEPVARIANT } from '../../subworkflows/nf-core/deepvariant/main.nf' +include { DEEPSOMATIC } from '../../subworkflows/local/deepsomatic.nf' +include { SMALL_VARIANT_CONSENSUS as GERMLINE_CONSENSUS } from '../../subworkflows/local/small_variant_consensus.nf' +include { SMALL_VARIANT_CONSENSUS as SOMATIC_CONSENSUS } from '../../subworkflows/local/small_variant_consensus.nf' + workflow TUMOR_NORMAL_HAPPHASE { @@ -138,7 +141,7 @@ workflow TUMOR_NORMAL_HAPPHASE { .mix(deepvariant_ch) .set{mixed_vcfs} - SMALL_VARIANT_CONSENSUS( + GERMLINE_CONSENSUS( mixed_vcfs, fasta, fai, @@ -148,10 +151,8 @@ workflow TUMOR_NORMAL_HAPPHASE { // Add germline vcf to normal bams // remove clair3 model information - SMALL_VARIANT_CONSENSUS.out.vcf.view() - normal_bams.view() normal_bams - .join(SMALL_VARIANT_CONSENSUS.out.vcf) + .join(GERMLINE_CONSENSUS.out.vcf) .map { meta, bam, bai, _clair3_model, _platform, vcf -> def svs = [] def mods = [] @@ -160,7 +161,7 @@ workflow TUMOR_NORMAL_HAPPHASE { .set{ normal_bams_germlinevcf } // [meta, bam, bai, germline_vcf, [], []] -- svs and mods are empty placeholders for LONGPHASE_PHASE input - SMALL_VARIANT_CONSENSUS.out.vcf + GERMLINE_CONSENSUS.out.vcf .map { meta, vcf -> def extra = [] return [meta, vcf, extra] From 57472838aaec47f5f0736fd8a70f6005d5f49c28 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 17 Mar 2026 11:34:45 +0100 Subject: [PATCH 12/36] channel revision --- conf/modules.config | 6 +- modules/local/clair3/main.nf | 15 ++-- modules/local/clairs/main.nf | 2 + modules/local/clairsto/main.nf | 2 + modules/nf-core/bcftools/isec/main.nf | 13 +++- .../nf-core/deepvariant/makeexamples/main.nf | 1 + .../deepvariant/postprocessvariants/main.nf | 1 + nextflow.config | 3 +- subworkflows/local/small_variant_consensus.nf | 77 +++++++++++++++---- 9 files changed, 91 insertions(+), 29 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 1f2ddd84..b518a7bc 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -118,7 +118,7 @@ process { withName: '.*:BCFTOOLS_ISEC' { ext.prefix = { "${meta.id}_isec" } ext.args ={ - "-n=2 -c all -Oz" + "-Oz" } publishDir = [ enabled: false @@ -358,14 +358,14 @@ process { // Small variant calling processes // withName: '.*:BCFTOOLS_CONCAT' { - ext.args = '--output-type z -a' + ext.args = {'-Oz -a -W=tbi'} publishDir = [ enabled: false ] } withName: '.*:BCFTOOLS_SORT' { ext.prefix = { "${meta.id}_sorted" } - ext.arge = '--output-type z' + ext.arge = {'-Oz'} publishDir = [ enabled: false ] diff --git a/modules/local/clair3/main.nf b/modules/local/clair3/main.nf index 33ef8609..ab398bab 100644 --- a/modules/local/clair3/main.nf +++ b/modules/local/clair3/main.nf @@ -25,15 +25,18 @@ process CLAIR3 { script: def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + """ run_clair3.sh \\ - --bam_fn=$bam \\ - --ref_fn=$reference \\ - --threads=$task.cpus \\ + --bam_fn=${bam} \\ + --ref_fn=${reference} \\ + --threads=${task.cpus} \\ --output=. \\ - --platform=$platform \\ - --model=$model \\ - $args + --platform=${platform} \\ + --model=${model} \\ + --sample_name=${prefix} \\ + ${args} """ stub: diff --git a/modules/local/clairs/main.nf b/modules/local/clairs/main.nf index d683c777..4342e401 100644 --- a/modules/local/clairs/main.nf +++ b/modules/local/clairs/main.nf @@ -20,6 +20,7 @@ process CLAIRS { task.ext.when == null || task.ext.when script: + prefix = task.ext.prefix ?: "${meta.id}" def args = task.ext.args ?: '' """ @@ -29,6 +30,7 @@ process CLAIRS { --ref_fn $reference \\ --threads $task.cpus \\ --platform $model \\ + --sample_name ${prefix} \\ --output_dir . \\ --output_prefix snvs \\ $args diff --git a/modules/local/clairsto/main.nf b/modules/local/clairsto/main.nf index 9071487d..2d5b07ec 100644 --- a/modules/local/clairsto/main.nf +++ b/modules/local/clairsto/main.nf @@ -28,6 +28,7 @@ process CLAIRSTO { script: def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" def conda_prefix = workflow.containerEngine == 'singularity' ? '--conda_prefix /opt/micromamba/envs/clairs-to' : '' def gnomad_arg = gnomad ?: 'gnomad.r2.1.af-ge-0.001.sites.vcf.gz' def dbSNP_arg = dbSNP ?: 'dbsnp.b138.non-somatic.sites.vcf.gz' @@ -41,6 +42,7 @@ process CLAIRSTO { --platform $model \\ --threads $task.cpus \\ --output_dir . \\ + --sample_name ${prefix} \\ --panel_of_normals "${gnomad_arg},${dbSNP_arg},${onekgenomes_arg},${colors_arg}" \\ --panel_of_normals_require_allele_matching 'True,True,False,False' \\ $conda_prefix \\ diff --git a/modules/nf-core/bcftools/isec/main.nf b/modules/nf-core/bcftools/isec/main.nf index 1536c523..2bbf6263 100644 --- a/modules/nf-core/bcftools/isec/main.nf +++ b/modules/nf-core/bcftools/isec/main.nf @@ -12,10 +12,15 @@ process BCFTOOLS_ISEC { output: tuple val(meta), path("${prefix}", type: "dir"), emit: results - tuple val(meta), path("${prefix}/0000.vcf.gz"), emit: deepvar_style_consensus_vcf - tuple val(meta), path("${prefix}/0000.vcf.gz.tbi"), emit: deepvar_style_consensus_tbi - tuple val(meta), path("${prefix}/0001.vcf.gz"), emit: clair3_style_consensus_vcf - tuple val(meta), path("${prefix}/0001.vcf.gz.tbi"), emit: clair3_style_consensus_tbi + tuple val(meta), path("${prefix}/0002.vcf.gz"), emit: deepvar_consensus_vcf + tuple val(meta), path("${prefix}/0002.vcf.gz.tbi"), emit: deepvar_consensus_tbi + tuple val(meta), path("${prefix}/0003.vcf.gz"), emit: clair_consensus_vcf + tuple val(meta), path("${prefix}/0003.vcf.gz.tbi"), emit: clair_consensus_tbi + tuple val(meta), path("${prefix}/0001.vcf.gz"), emit: clair_private_vcf + tuple val(meta), path("${prefix}/0001.vcf.gz.tbi"), emit: clair_private_tbi + tuple val(meta), path("${prefix}/0000.vcf.gz"), emit: deepvar_private_vcf + tuple val(meta), path("${prefix}/0000.vcf.gz.tbi"), emit: deepvar_private_tbi + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools when: diff --git a/modules/nf-core/deepvariant/makeexamples/main.nf b/modules/nf-core/deepvariant/makeexamples/main.nf index 77d2f331..d553e980 100644 --- a/modules/nf-core/deepvariant/makeexamples/main.nf +++ b/modules/nf-core/deepvariant/makeexamples/main.nf @@ -36,6 +36,7 @@ process DEEPVARIANT_MAKEEXAMPLES { --mode calling \\ --ref "${fasta}" \\ --reads "${input}" \\ + --sample_name ${prefix} \\ --examples "./${prefix}.examples.tfrecord@${task.cpus}.gz" \\ --gvcf "./${prefix}.gvcf.tfrecord@${task.cpus}.gz" \\ ${regions} \\ diff --git a/modules/nf-core/deepvariant/postprocessvariants/main.nf b/modules/nf-core/deepvariant/postprocessvariants/main.nf index 0830f9ac..2a7e8fb6 100644 --- a/modules/nf-core/deepvariant/postprocessvariants/main.nf +++ b/modules/nf-core/deepvariant/postprocessvariants/main.nf @@ -64,6 +64,7 @@ process DEEPVARIANT_POSTPROCESSVARIANTS { --outfile "${prefix}.vcf.gz" \\ --nonvariant_site_tfrecord_path "${gvcf_tfrecords_logical_name}" \\ --gvcf_outfile "${prefix}.g.vcf.gz" \\ + --sample_name ${prefix} \\ ${regions} \\ ${small_model_arg} \\ --cpus $task.cpus diff --git a/nextflow.config b/nextflow.config index 68c9fd9b..010ad79e 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,7 +13,8 @@ params { // Input options input = null - germline_var_keep = 'all' + germline_var_keep = 'consensus' + trust_caller = 'clair' // References diff --git a/subworkflows/local/small_variant_consensus.nf b/subworkflows/local/small_variant_consensus.nf index 92399dcd..c82b3c7b 100644 --- a/subworkflows/local/small_variant_consensus.nf +++ b/subworkflows/local/small_variant_consensus.nf @@ -1,9 +1,10 @@ -include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm/main' -include { BCFTOOLS_ISEC } from '../../modules/nf-core/bcftools/isec/main' -include { BCFTOOLS_MERGE } from '../../modules/nf-core/bcftools/merge/main' -include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/query/main' -include { BCFTOOLS_ANNOTATE } from '../../modules/nf-core/bcftools/annotate/main' +include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_ISEC } from '../../modules/nf-core/bcftools/isec/main' +include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/query/main' +include { BCFTOOLS_ANNOTATE } from '../../modules/nf-core/bcftools/annotate/main' +include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main' @@ -105,23 +106,69 @@ workflow SMALL_VARIANT_CONSENSUS { } .set{isec_input} BCFTOOLS_ISEC(isec_input) - BCFTOOLS_ISEC.out.deepvar_style_consensus_vcf + + if (params.trust_caller = 'deepvariant') { + BCFTOOLS_ISEC.out.clair_consensus_vcf .set{vcf} - BCFTOOLS_ISEC.out.deepvar_style_consensus_tbi + BCFTOOLS_ISEC.out.clair_consensus_tbi .set{tbi} + } + if (params.trust_caller = 'clair') { + BCFTOOLS_ISEC.out.clair_consensus_vcf + .set{vcf} + BCFTOOLS_ISEC.out.clair_consensus_tbi + .set{tbi} + } + } else if (var_keep_method == 'all'){ + mixed_vcfs - .map{ meta, vcfs, tbis -> - def bed = [] - return [ meta, vcfs, tbis, bed ] - } - .set{ merge_input} - BCFTOOLS_MERGE(merge_input, fasta, fai ) - BCFTOOLS_MERGE.out.vcf + .map{ meta, vcfs, tbis -> + def file = [] + def target = [] + def regions = [] + return [meta, vcfs, tbis, file, target, regions] + } + .set{isec_input} + + BCFTOOLS_ISEC(isec_input) + + if (params.trust_caller = 'deepvariant') { + BCFTOOLS_ISEC.out.deepvar_consensus_vcf + .join(BCFTOOLS_ISEC.out.deepvar_consensus_tbi) + .join(BCFTOOLS_ISEC.out.clair_private_vcf) + .join(BCFTOOLS_ISEC.out.clair_private_tbi) + .map{ meta, deepvar_vcf, deepvar_tbi, clair_vcf, clair_tbi -> + return[meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi]] + } + .set{concat_input} + BCFTOOLS_CONCAT(concat_input) + BCFTOOLS_CONCAT.out.vcf + .join(BCFTOOLS_CONCAT.out.tbi) + .set{concat_out} + } + + else if (params.trust_caller = 'clair') { + BCFTOOLS_ISEC.out.deepvar_private_vcf + .join(BCFTOOLS_ISEC.out.deepvar_private_tbi) + .join(BCFTOOLS_ISEC.out.clair_consensus_vcf) + .join(BCFTOOLS_ISEC.out.clair_consensus_tbi) + .map{ meta, deepvar_vcf, deepvar_tbi, clair_vcf, clair_tbi -> + return[meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi]] + } + .set{concat_input} + BCFTOOLS_CONCAT(concat_input) + BCFTOOLS_CONCAT.out.vcf + .join(BCFTOOLS_CONCAT.tbi) + .set{concat_out} + } + concat_out.view() + BCFTOOLS_SORT(concat_out) + BCFTOOLS_SORT.out.vcf .set{vcf} - BCFTOOLS_MERGE.out.index + BCFTOOLS_SORT.out.tbi .set{tbi} } From f676fd43535538c53911c2a235ad59b15db1ad00 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 17 Mar 2026 13:10:59 +0100 Subject: [PATCH 13/36] channel restructure --- conf/modules.config | 2 +- .../local/deepsomatic/makeexamples/main.nf | 2 +- nextflow.config | 3 +- nextflow_schema.json | 24 ++++++++++++ subworkflows/local/small_variant_consensus.nf | 6 --- subworkflows/local/tumor_normal_happhase.nf | 31 +++++++++++++++ subworkflows/local/tumor_only_happhase.nf | 39 ++++++++++++++++--- 7 files changed, 93 insertions(+), 14 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index b518a7bc..5e54217c 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -365,7 +365,7 @@ process { } withName: '.*:BCFTOOLS_SORT' { ext.prefix = { "${meta.id}_sorted" } - ext.arge = {'-Oz'} + ext.args = {'-Oz -W=tbi'} publishDir = [ enabled: false ] diff --git a/modules/local/deepsomatic/makeexamples/main.nf b/modules/local/deepsomatic/makeexamples/main.nf index f1b148b4..206e497c 100644 --- a/modules/local/deepsomatic/makeexamples/main.nf +++ b/modules/local/deepsomatic/makeexamples/main.nf @@ -36,7 +36,7 @@ process DEEPSOMATIC_MAKEEXAMPLES { --ref "${fasta}" \\ --reads_tumor "${tumor_input}" \\ ${normalReadsArg} \\ - --sample_name_tumor "${prefix}_tumor" \\ + --sample_name_tumor "${prefix}" \\ ${normalSampleArg} \\ --examples "./${prefix}.examples.tfrecord@${task.cpus}.gz" \\ --gvcf "./${prefix}.gvcf.tfrecord@${task.cpus}.gz" \\ diff --git a/nextflow.config b/nextflow.config index 010ad79e..fb67ff05 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,7 +14,8 @@ params { input = null germline_var_keep = 'consensus' - trust_caller = 'clair' + somatic_var_keep = 'all' + trust_caller = 'deepvariant' // References diff --git a/nextflow_schema.json b/nextflow_schema.json index b25ea2ee..f22408bc 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -43,6 +43,30 @@ } } }, + "small_variant_calling_options": { + "title": "options for small variant calling", + "type": "object", + "properties": { + "trust_caller": { + "type": "string", + "description": "specifies which variant caller format to use when both clair and deepvariant call the same variant", + "default": "deepvariant", + "enum": ["deepvariant", "clair"] + }, + "germline_var_keep": { + "type": "string", + "description": "specifies which germline variants are used for phasing and annotation. Must be [consensus, all,deepvariant,clair]", + "default": "consensus", + "enum": ["consensus", "all","deepvariant","clair"] + }, + "somatic_var_keep": { + "type": "string", + "description": "specifies which somatic variants are used for phasing and annotation. Must be [consensus, all,deepvariant,clair]", + "default": "all", + "enum": ["consensus", "all", "deepvariant","clair"] + } + } + }, "reference_genome_options": { "title": "Reference genome options", "type": "object", diff --git a/subworkflows/local/small_variant_consensus.nf b/subworkflows/local/small_variant_consensus.nf index c82b3c7b..266b52a5 100644 --- a/subworkflows/local/small_variant_consensus.nf +++ b/subworkflows/local/small_variant_consensus.nf @@ -84,8 +84,6 @@ workflow SMALL_VARIANT_CONSENSUS { return [ new_meta, vcfs, tbi] } .set{deepvariant_ch} - deepvariant_ch.view() - clair_ch.view() deepvariant_ch .join(clair_ch) .map { meta, deepvar_vcf, deepvar_tbi, clair_vcf, clair_tbi -> @@ -95,7 +93,6 @@ workflow SMALL_VARIANT_CONSENSUS { } .set{mixed_vcfs} - mixed_vcfs.view() if (var_keep_method == 'consensus') { mixed_vcfs .map{ meta, vcfs, tbis -> @@ -146,7 +143,6 @@ workflow SMALL_VARIANT_CONSENSUS { .set{concat_input} BCFTOOLS_CONCAT(concat_input) BCFTOOLS_CONCAT.out.vcf - .join(BCFTOOLS_CONCAT.out.tbi) .set{concat_out} } @@ -161,10 +157,8 @@ workflow SMALL_VARIANT_CONSENSUS { .set{concat_input} BCFTOOLS_CONCAT(concat_input) BCFTOOLS_CONCAT.out.vcf - .join(BCFTOOLS_CONCAT.tbi) .set{concat_out} } - concat_out.view() BCFTOOLS_SORT(concat_out) BCFTOOLS_SORT.out.vcf .set{vcf} diff --git a/subworkflows/local/tumor_normal_happhase.nf b/subworkflows/local/tumor_normal_happhase.nf index 209a9995..8ef04aaa 100644 --- a/subworkflows/local/tumor_normal_happhase.nf +++ b/subworkflows/local/tumor_normal_happhase.nf @@ -326,13 +326,44 @@ workflow TUMOR_NORMAL_HAPPHASE { BCFTOOLS_SORT ( BCFTOOLS_CONCAT.out.vcf ) + BCFTOOLS_SORT.out.vcf.view() + BCFTOOLS_SORT.out.tbi.view() + + DEEPSOMATIC.out.vcf + .join(DEEPSOMATIC.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepsomatic'] + return [new_meta, vcf, tbi] + } + .set{deepsomatic_ch} BCFTOOLS_SORT.out.vcf + .join(BCFTOOLS_SORT.out.tbi) + .map { meta, vcf , tbi -> + def new_meta = meta + [caller:'clairs'] + return [new_meta, vcf, tbi] + } + .set{clairs_ch} + // [meta,deepvar_vcf,deepvar_index,clair3_vcf,clair3_index] + clairs_ch.view() + clairs_ch + .mix(deepsomatic_ch) + .set{mixed_somatic_vcfs} + mixed_somatic_vcfs.view() + SOMATIC_CONSENSUS( + mixed_somatic_vcfs, + fasta, + fai, + params.somatic_var_keep + ) + + SOMATIC_CONSENSUS.out.vcf .map { meta, vcf -> def extra = [] return [meta, vcf, extra] } .set { somatic_vep } + // [meta, sorted_clairs_vcf, []] -- somatic small variants (SNV+indel merged) for VEP annotation emit: diff --git a/subworkflows/local/tumor_only_happhase.nf b/subworkflows/local/tumor_only_happhase.nf index 48b25fed..c2bdf35a 100644 --- a/subworkflows/local/tumor_only_happhase.nf +++ b/subworkflows/local/tumor_only_happhase.nf @@ -5,7 +5,9 @@ include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/hapl include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' include { DEEPVARIANT } from '../../subworkflows/nf-core/deepvariant/main.nf' include { DEEPSOMATIC } from '../../subworkflows/local/deepsomatic.nf' -include { SMALL_VARIANT_CONSENSUS } from '../../subworkflows/local/small_variant_consensus.nf' + +include { SMALL_VARIANT_CONSENSUS as GERMLINE_CONSENSUS } from '../../subworkflows/local/small_variant_consensus.nf' +include { SMALL_VARIANT_CONSENSUS as SOMATIC_CONSENSUS } from '../../subworkflows/local/small_variant_consensus.nf' workflow TUMOR_ONLY_HAPPHASE { @@ -115,16 +117,43 @@ workflow TUMOR_ONLY_HAPPHASE { .mix(deepvariant_ch) .set{mixed_vcfs} - SMALL_VARIANT_CONSENSUS( + GERMLINE_CONSENSUS( mixed_vcfs, fasta, fai, params.germline_var_keep ) + + VCFSPLIT.out.somatic_vcf + .join(VCFSPLIT.out.somatic_tbi) + .map { meta, vcf, tbi -> + def new_meta = meta + [caller:'clairs-to'] + return [ new_meta, vcf, tbi] + } + .set{clairsto_somatic_ch} + + DEEPSOMATIC.out.vcf + .join(DEEPSOMATIC.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepsomatic'] + return [new_meta, vcf, tbi] + } + .set{deepsomatic_ch} + + clairsto_somatic_ch + .mix(deepsomatic_ch) + .set{mixed_somatic_vcfs} + + SOMATIC_CONSENSUS( + mixed_somatic_vcfs, + fasta, + fai, + params.somatic_var_keep + ) // Add the nonsomatic vcf info // remove model info tumor_bams - .join(SMALL_VARIANT_CONSENSUS.out.vcf) + .join(GERMLINE_CONSENSUS.out.vcf) .map{ meta, bam, bai, _model, snps -> def svs = [] def mods = [] @@ -133,7 +162,7 @@ workflow TUMOR_ONLY_HAPPHASE { .set{ tumor_bams_germlinevcf } // [meta, bam, bai, nonsomatic_vcf, [], []] -- non-somatic variants used for phasing; svs and mods are empty placeholders for LONGPHASE_PHASE input - VCFSPLIT.out.somatic_vcf + SOMATIC_CONSENSUS.out.vcf .map { meta, vcf -> def extra = [] return [meta,vcf, extra] @@ -141,7 +170,7 @@ workflow TUMOR_ONLY_HAPPHASE { .set { somatic_vep } // [meta, somatic_vcf, []] -- PASS (somatic) variants for VEP annotation - VCFSPLIT.out.germline_vcf + GERMLINE_CONSENSUS.out.vcf .map { meta, vcf -> def extra = [] return [meta,vcf, extra] From 43ca8f8600d886113fcf997120b93879c80adbb1 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 17 Mar 2026 14:03:48 +0100 Subject: [PATCH 14/36] improve clairsto pon handling --- modules/local/clairsto/main.nf | 16 +++------ nextflow.config | 2 ++ subworkflows/local/tumor_only_happhase.nf | 14 +++----- workflows/lrsomatic.nf | 40 ++++++++++++++++------- 4 files changed, 39 insertions(+), 33 deletions(-) diff --git a/modules/local/clairsto/main.nf b/modules/local/clairsto/main.nf index 2d5b07ec..04e73819 100644 --- a/modules/local/clairsto/main.nf +++ b/modules/local/clairsto/main.nf @@ -8,13 +8,9 @@ process CLAIRSTO { 'docker.io/hkubal/clairs-to:v0.4.2' }" input: - tuple val(meta), path(tumor_bam), path(tumor_bai), val(model) + tuple val(meta), path(tumor_bam), path(tumor_bai), val(model), path(pon_vcfs), val(pon_flags) tuple val(meta2), path(reference) tuple val(meta3), path(index) - path(dbSNP) - path(colors) - path(onekgenomes) - path(gnomad) output: tuple val(meta), path("indel.vcf.gz"), emit: indel_vcf @@ -30,10 +26,8 @@ process CLAIRSTO { def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def conda_prefix = workflow.containerEngine == 'singularity' ? '--conda_prefix /opt/micromamba/envs/clairs-to' : '' - def gnomad_arg = gnomad ?: 'gnomad.r2.1.af-ge-0.001.sites.vcf.gz' - def dbSNP_arg = dbSNP ?: 'dbsnp.b138.non-somatic.sites.vcf.gz' - def onekgenomes_arg = onekgenomes ?: '1000g-pon.sites.vcf.gz' - def colors_arg = colors ?: 'colors-pon.sites.vcf.gz' + def pon_string = pon_vcfs.join(',') + def flags_string = pon_flags.join(',') """ /opt/bin/run_clairs_to \ @@ -43,8 +37,8 @@ process CLAIRSTO { --threads $task.cpus \\ --output_dir . \\ --sample_name ${prefix} \\ - --panel_of_normals "${gnomad_arg},${dbSNP_arg},${onekgenomes_arg},${colors_arg}" \\ - --panel_of_normals_require_allele_matching 'True,True,False,False' \\ + --panel_of_normals ${pon_string} \\ + --panel_of_normals_require_allele_matching ${flags_string} \\ $conda_prefix \\ $args \\ """ diff --git a/nextflow.config b/nextflow.config index fb67ff05..03531fb9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -17,6 +17,8 @@ params { somatic_var_keep = 'all' trust_caller = 'deepvariant' + pon_vcfs = null + pon_flags = null // References genome = null diff --git a/subworkflows/local/tumor_only_happhase.nf b/subworkflows/local/tumor_only_happhase.nf index c2bdf35a..e68d4c06 100644 --- a/subworkflows/local/tumor_only_happhase.nf +++ b/subworkflows/local/tumor_only_happhase.nf @@ -16,10 +16,7 @@ workflow TUMOR_ONLY_HAPPHASE { tumor_bams fasta fai - dbsnp - colors - onekgenomes - gnomad + pon_channel main: @@ -71,15 +68,12 @@ workflow TUMOR_ONLY_HAPPHASE { // // call somatic/non-somatic variants // (* not called as germline * just non-somatic) - + tumor_bams.combine(pon_channel).set{new_combine} + new_combine.view() CLAIRSTO ( - tumor_bams, + new_combine, fasta, fai, - dbsnp, - colors, - onekgenomes, - gnomad ) CLAIRSTO.out.indel_vcf diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index fc61e9c6..6f296c9d 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -93,10 +93,33 @@ workflow LRSOMATIC { params.bed_file = getGenomeAttribute('bed_file') params.vep_genome = getGenomeAttribute('vep_genome') params.vep_species = getGenomeAttribute('vep_species') - params.dbsnp = getGenomeAttribute('dbsnp') - params.colors = getGenomeAttribute('colors') - params.onekgenomes = getGenomeAttribute('onekgenomes') - params.gnomad = getGenomeAttribute('gnomad') + + if (params.pons_vcfs != null) { + pon_files = params.pon_vcfs.collect { file(it) } + pon_flags = params.pon_flags + } + else { + pon_files = [ + getGenomeAttribute('gnomad'), + getGenomeAttribute('dbsnp'), + getGenomeAttribute('onekgenomes'), + getGenomeAttribute('colors'), + ] + pon_flags = [ + "True", + "True", + "False", + "False" + ] + } + if (pon_files.size() != pon_flags.size()) { + error "PoN VCFs and allele flags must have same length" + } + Channel + .of( tuple(pon_files, pon_flags) ) + .set { pon_channel } + + pon_channel.view() ch_versions = channel.empty() ch_multiqc_files = channel.empty() @@ -404,19 +427,12 @@ workflow LRSOMATIC { // // Phasing/haplotagging for tumor only samples - dbsnp = file(params.dbsnp) - colors = file(params.colors) - onekgenomes = file(params.onekgenomes) - gnomad = file(params.gnomad) TUMOR_ONLY_HAPPHASE ( branched_minimap.tumor_only, ch_fasta, ch_fai, - dbsnp, - colors, - onekgenomes, - gnomad + pon_channel ) germline_vep = TUMOR_NORMAL_HAPPHASE.out.germline_vep.mix(TUMOR_ONLY_HAPPHASE.out.germline_vep) From 5fda74420daa47456fdfda3a90b04984d92381ef Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Mon, 23 Mar 2026 16:05:55 +0100 Subject: [PATCH 15/36] migrate some functions to different workflows to improve readability --- conf/modules.config | 2 +- log.txt | 0 modules/local/longphase/modcall/main.nf | 61 +++ modules/local/vcfsplit/main.nf | 4 +- modules/nf-core/longphase/phase/main.nf | 10 +- nextflow.config | 1 + .../local/paired/paired_smallvar_germline.nf | 144 +++++++ .../local/paired/paired_smallvar_somatic.nf | 126 ++++++ subworkflows/local/phasing_haplotyping.nf | 235 +++++++++++ subworkflows/local/small_variant_consensus.nf | 14 +- subworkflows/local/tumor_normal_happhase.nf | 375 ------------------ .../local/tumor_only/tumoronly_smallvar.nf | 207 ++++++++++ subworkflows/local/tumor_only_happhase.nf | 252 ------------ workflows/lrsomatic.nf | 160 ++++++-- 14 files changed, 913 insertions(+), 678 deletions(-) create mode 100644 log.txt create mode 100644 modules/local/longphase/modcall/main.nf create mode 100644 subworkflows/local/paired/paired_smallvar_germline.nf create mode 100644 subworkflows/local/paired/paired_smallvar_somatic.nf create mode 100644 subworkflows/local/phasing_haplotyping.nf delete mode 100644 subworkflows/local/tumor_normal_happhase.nf create mode 100644 subworkflows/local/tumor_only/tumoronly_smallvar.nf delete mode 100644 subworkflows/local/tumor_only_happhase.nf diff --git a/conf/modules.config b/conf/modules.config index 5e54217c..397e73aa 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -260,7 +260,7 @@ process { // withName: '.*:MINIMAP2_ALIGN' { - ext.prefix = { "${meta.id}_mapped" } + ext.prefix = { "${meta.id}_${meta.type}_mapped" } ext.args = { [ meta.platform == 'pb' ? ( params.minimap2_pb_model ? "-ax $params.minimap2_pb_model" : "-ax map-hifi" ) : diff --git a/log.txt b/log.txt new file mode 100644 index 00000000..e69de29b diff --git a/modules/local/longphase/modcall/main.nf b/modules/local/longphase/modcall/main.nf new file mode 100644 index 00000000..dd04482e --- /dev/null +++ b/modules/local/longphase/modcall/main.nf @@ -0,0 +1,61 @@ +process LONGPHASE_MODCALL { + tag "$meta.id" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b0/b0184a9a36d8612fbae38bbaad7b52f03b815ad17673740e107cf1f267a1f15d/data': + 'community.wave.seqera.io/library/htslib_longphase:3071e61356fc25a4' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + + output: + tuple val(meta), path("*.vcf") , emit: mod_vcf + tuple val(meta), path("*.log") , emit: log , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + longphase \\ + modcall \\ + $args \\ + --threads $task.cpus \\ + -o ${prefix} \\ + --reference ${fasta} \\ + -b ${bam} \\ + --out-prefix ${prefix} + + if [ -f "${prefix}.out" ]; then + mv ${prefix}.out ${prefix}.log + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def log = args.contains('--log') ? "touch ${prefix}.log" : '' + """ + touch ${prefix}.vcf + ${log} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ +} diff --git a/modules/local/vcfsplit/main.nf b/modules/local/vcfsplit/main.nf index df7ce318..5c9bb05f 100644 --- a/modules/local/vcfsplit/main.nf +++ b/modules/local/vcfsplit/main.nf @@ -31,8 +31,8 @@ process VCFSPLIT { bcftools concat -a -Oz -o somatic.vcf.gz indels_pass.vcf.gz snv_pass.vcf.gz tabix -p vcf somatic.vcf.gz - bcftools view -i 'FILTER="NonSomatic"' $indel_vcf | bgzip -c > indels_filtered.vcf.gz - bcftools view -i 'FILTER="NonSomatic"' $snv_vcf | bgzip -c > snv_filtered.vcf.gz + bcftools view -i 'FILTER="NonSomatic" | Verdict_Germline' $indel_vcf | bgzip -c > indels_filtered.vcf.gz + bcftools view -i 'FILTER="NonSomatic" | Verdict_Germline' $snv_vcf | bgzip -c > snv_filtered.vcf.gz tabix -p vcf indels_filtered.vcf.gz tabix -p vcf snv_filtered.vcf.gz bcftools concat -a -Oz -o germline_tmp.vcf.gz indels_filtered.vcf.gz snv_filtered.vcf.gz diff --git a/modules/nf-core/longphase/phase/main.nf b/modules/nf-core/longphase/phase/main.nf index 3b942972..5a20381f 100644 --- a/modules/nf-core/longphase/phase/main.nf +++ b/modules/nf-core/longphase/phase/main.nf @@ -49,7 +49,15 @@ process LONGPHASE_PHASE { $args2 \\ ${prefix}*.vcf - tabix -p vcf ${prefix}*.vcf.gz + tabix -p vcf ${prefix}.vcf.gz + + if [ -f ${prefix}_SV.vcf.gz ]; then + tabix -p vcf ${prefix}_SV.vcf.gz + fi + + if [ -f ${prefix}_mod.vcf.gz ]; then + tabix -p vcf ${prefix}_mod.vcf.gz + fi cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/nextflow.config b/nextflow.config index 03531fb9..1cef0a68 100644 --- a/nextflow.config +++ b/nextflow.config @@ -47,6 +47,7 @@ params { skip_fiber = false skip_m6a = false skip_vep = false + skip_modcall = false // minimap2 options minimap2_ont_model = null diff --git a/subworkflows/local/paired/paired_smallvar_germline.nf b/subworkflows/local/paired/paired_smallvar_germline.nf new file mode 100644 index 00000000..4ad2d727 --- /dev/null +++ b/subworkflows/local/paired/paired_smallvar_germline.nf @@ -0,0 +1,144 @@ +// IMPORT MODULES +include { CLAIR3 } from '../../../modules/local/clair3/main.nf' + +// IMPORT SUBWORKFLOWS +include { DEEPVARIANT } from '../../../subworkflows/nf-core/deepvariant/main.nf' +include { SMALL_VARIANT_CONSENSUS as GERMLINE_CONSENSUS } from '../../../subworkflows/local/small_variant_consensus.nf' + +workflow PAIRED_SMALLVAR_GERMLINE { + + take: + normal_bams // [ meta, normal_bam, normal_bai ] + fasta + fai + clair3_models + + main: + ch_versions = channel.empty() + germline_vcf = channel.empty() + germline_tbi = channel.empty() + // COMBINE NORMAL BAMS WITH DOWNLOADED CLAIR3 MODELS + if(params.germline_var_keep != 'deepvariant') { + + clair3_models + .map{ meta, file -> + def clair3_model_name = meta.id + return [meta, clair3_model_name, file] + } + .set{clair3_models} + normal_bams + .map{ meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, meta.clair3_model, bam, bai ] + } + .set { normal_bams_model } + + // CLAIR3 + normal_bams_model + .combine(clair3_models,by:1) + .map {_clair3_model, meta_bam, bam, bai, _meta_model, model -> + def platform = (meta_bam.platform == 'pb') ? 'hifi' : meta_bam.platform + return [meta_bam, bam, bai, model, platform] + } + .set{ clair3_input_ch } + + CLAIR3 ( + clair3_input_ch, + fasta, + fai + ) + + CLAIR3.out.vcf + .join(CLAIR3.out.tbi) + .map { meta, vcf , tbi -> + def new_meta = meta + [caller:'clair3'] + return [new_meta, vcf, tbi] + } + .set{clair3_ch} + } + // DEEPVARIANT + if(params.germline_var_keep != 'clair') { + + normal_bams + .map {meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + def intervals = [] + return [new_meta, bam, bai, intervals] + } + .set{deepvariant_input_ch} + + DEEPVARIANT ( + deepvariant_input_ch, + fasta, + fai, + [[:],[]], + [[:],[]] + ) + + DEEPVARIANT.out.vcf + .join(DEEPVARIANT.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepvariant'] + return [new_meta, vcf, tbi] + } + .set{deepvariant_ch} + } + // COMBINE GERMLINE VARIATION + if (params.germline_var_keep != 'clair' && params.germline_var_keep != 'deepvariant' ) { + clair3_ch + .mix(deepvariant_ch) + .set{combined_germline_ch} + + GERMLINE_CONSENSUS( + combined_germline_ch, + fasta, + fai, + params.germline_var_keep + ) + GERMLINE_CONSENSUS.out.vcf + .join(GERMLINE_CONSENSUS.out.tbi) + .set{ germline_vcf } + } + else if (params.germline_var_keep == 'clair') { + clair3_ch + .set{germline_vcf} + } + else if (params.germline_var_keep == 'deepvariant') { + deepvariant_ch + .set{germline_vcf} + } + + germline_vcf + .map{ meta, vcf, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, vcf, tbi] + } + .set{germline_vcf} + + emit: + germline_vcf +} diff --git a/subworkflows/local/paired/paired_smallvar_somatic.nf b/subworkflows/local/paired/paired_smallvar_somatic.nf new file mode 100644 index 00000000..e7cad038 --- /dev/null +++ b/subworkflows/local/paired/paired_smallvar_somatic.nf @@ -0,0 +1,126 @@ +// IMPORT MODULES +include { CLAIRS } from '../../../modules/local/clairs/main.nf' +include { BCFTOOLS_CONCAT } from '../../../modules/nf-core/bcftools/concat' +include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort' + +// IMPORT SUBWORKFLOWS +include { DEEPSOMATIC } from '../../../subworkflows/local/deepsomatic.nf' +include { SMALL_VARIANT_CONSENSUS as SOMATIC_CONSENSUS } from '../../../subworkflows/local/small_variant_consensus.nf' + +workflow PAIRED_SMALLVAR_SOMATIC { + + take: + tumor_normal_bams // [ meta, tumor_bam, tumor_bai, normal_hapbam, normal_bai ] + fasta + fai + + main: + ch_versions = channel.empty() + somatic_vcf = channel.empty() + somatic_tbi = channel.empty() + + // CLAIRS + if(params.somatic_var_keep != 'deepvariant') { + tumor_normal_bams + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return[meta , tumor_bam, tumor_bai, normal_bam, normal_bai, meta.clairS_model] + } + .set { clairs_input } + + CLAIRS ( + clairs_input, + fasta, + fai + ) + + // CONCAT CLAIRS INDEL AND SNV OUTPUT + + CLAIRS.out.vcfs + .join(CLAIRS.out.tbi) + .set{clairs_out} + + BCFTOOLS_CONCAT ( + clairs_out + ) + + BCFTOOLS_SORT ( + BCFTOOLS_CONCAT.out.vcf + ) + + BCFTOOLS_SORT.out.vcf + .join(BCFTOOLS_SORT.out.tbi) + .map { meta, vcf , tbi -> + def new_meta = meta + [caller:'clairs'] + return [new_meta, vcf, tbi] + } + .set{clairs_ch} + } + // DEEPSOMATIC + + if(params.somatic_var_keep != 'clair') { + + tumor_normal_bams + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + } + .set{ deepsomatic_input } + + DEEPSOMATIC ( + deepsomatic_input, + [[:],[]], + fasta, + fai, + [[:],[]] + ) + + DEEPSOMATIC.out.vcf + .join(DEEPSOMATIC.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepsomatic'] + return [new_meta, vcf, tbi] + } + .set{deepsomatic_ch} + + } + // COMBINE GERMLINE VARIATION + if (params.somatic_var_keep != 'clair' && params.somatic_var_keep != 'deepvariant' ) { + clairs_ch + .mix(deepsomatic_ch) + .set{combine_somatic_ch} + + SOMATIC_CONSENSUS( + combine_somatic_ch, + fasta, + fai, + params.somatic_var_keep + ) + + SOMATIC_CONSENSUS.out.vcf + .join(SOMATIC_CONSENSUS.out.tbi) + .set{ somatic_vcf } + } + else if (params.somatic_var_keep == 'clair') { + clairs_ch + .set{somatic_vcf} + } + else if (params.somatic_var_keep == 'deepvariant') { + deepsomatic_ch + .set{somatic_vcf} + } + somatic_vcf + .map{ meta, vcf, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, vcf, tbi] + } + .set{somatic_vcf} + emit: + somatic_vcf +} diff --git a/subworkflows/local/phasing_haplotyping.nf b/subworkflows/local/phasing_haplotyping.nf new file mode 100644 index 00000000..272b5ef0 --- /dev/null +++ b/subworkflows/local/phasing_haplotyping.nf @@ -0,0 +1,235 @@ +// Import modules +include { LONGPHASE_PHASE } from '../../modules/nf-core/longphase/phase/main.nf' +include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' +include { LONGPHASE_MODCALL } from '../../modules/local/longphase/modcall/main.nf' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' +workflow PHASING_HAPLOTYPING { + take: + tumor_normal_bams // [meta, bam, bai] + germline_vcf + fasta + fai + + main: + + // SPLIT INTO PAIRED AND TUMOR ONLY + tumor_normal_bams + .branch { meta, _bams, _bai -> + paired: meta.paired_data + tumor_only: !meta.paired_data + } + .set { branched_bams } + + branched_bams.paired + .set{ paired_ch } + + branched_bams.tumor_only + .map { meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, bam, bai ] + } + .set{ tumor_only_ch } + + paired_ch + .branch { meta, _bam, _bai -> + normal: meta.type == "normal" + tumor: meta.type == "tumor" + } + .set {paired_ch_branched} + + paired_ch_branched.normal + .map { meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, bam, bai ] + } + .set{ paired_normal_ch } + + paired_ch_branched.tumor + .map { meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, bam, bai ] + } + .set{ paired_tumor_ch } + + tumor_only_ch + .mix(paired_normal_ch) + .set { longphase_modcall_input_ch } + + // MODCALL + + if (!params.skip_modcall) { + + LONGPHASE_MODCALL ( + longphase_modcall_input_ch, + fasta, + fai + ) + + } + // PHASING + if (!params.skip_modcall) { + longphase_modcall_input_ch + .join(germline_vcf) + .join(LONGPHASE_MODCALL.out.mod_vcf) + .map { meta, bam, bai, vcf, _tbi, mods-> + def svs = [] + return [ meta, bam, bai, vcf, svs, mods ] + } + .set{ longphase_phase_input_ch } + } + else { + longphase_modcall_input_ch + .join(germline_vcf) + .map { meta, bam, bai, vcf, _tbi-> + def svs = [] + def mods = [] + return [ meta, bam, bai, vcf, svs, mods ] + } + .set{ longphase_phase_input_ch } + } + LONGPHASE_PHASE ( + longphase_phase_input_ch, + fasta, + fai + ) + + LONGPHASE_PHASE.out.snv_vcf + .join(LONGPHASE_PHASE.out.snv_vcf_index) + .set{ phased_germline_vcf } + + // HAPLOTAGING + // remove type for merging + + + if(!params.skip_modcall) { + + LONGPHASE_MODCALL.out.mod_vcf + .map { meta, mods -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, mods ] + } + .set{modcall_vcf_ch} + + tumor_only_ch + .join(LONGPHASE_PHASE.out.snv_vcf) + .join(modcall_vcf_ch) + .map { meta, bam, bai, vcf, mods -> + def new_meta = meta + [type : "tumor"] + def svs = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ tumor_only_ch } + + paired_tumor_ch + .join(LONGPHASE_PHASE.out.snv_vcf) + .join(modcall_vcf_ch) + .map { meta, bam, bai, vcf, mods -> + def new_meta = meta + [type : "tumor"] + def svs = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ paired_tumor_ch } + + paired_normal_ch + .join(LONGPHASE_PHASE.out.snv_vcf) + .join(modcall_vcf_ch) + .map { meta, bam, bai, vcf, mods -> + def new_meta = meta + [type : "normal"] + def svs = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ paired_normal_ch } + + } + else { + + tumor_only_ch + .join(LONGPHASE_PHASE.out.snv_vcf) + .map { meta, bam, bai, vcf -> + def new_meta = meta + [type : "tumor"] + def svs = [] + def mods = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ tumor_only_ch } + + paired_tumor_ch + .join(LONGPHASE_PHASE.out.snv_vcf) + .map { meta, bam, bai, vcf -> + def new_meta = meta + [type : "tumor"] + def svs = [] + def mods = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ paired_tumor_ch } + + paired_normal_ch + .join(LONGPHASE_PHASE.out.snv_vcf) + .map { meta, bam, bai, vcf -> + def new_meta = meta + [type : "normal"] + def svs = [] + def mods = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ paired_normal_ch } + + } + + tumor_only_ch + .join(paired_tumor_ch) + .join(paired_normal_ch) + .set {longphase_haplotag_input_ch} + + LONGPHASE_HAPLOTAG ( + longphase_haplotag_input_ch, + fasta, + fai + ) + + LONGPHASE_HAPLOTAG.out.bam + .set{ tumor_normal_hapbams_ch } + + SAMTOOLS_INDEX ( + tumor_normal_hapbams_ch + ) + tumor_normal_hapbams_ch + .join(SAMTOOLS_INDEX.out.bai) + .set{ tumor_normal_hapbams_ch } + + + emit: + tumor_normal_hapbams_ch + phased_germline_vcf +} \ No newline at end of file diff --git a/subworkflows/local/small_variant_consensus.nf b/subworkflows/local/small_variant_consensus.nf index 266b52a5..823c38c9 100644 --- a/subworkflows/local/small_variant_consensus.nf +++ b/subworkflows/local/small_variant_consensus.nf @@ -84,6 +84,7 @@ workflow SMALL_VARIANT_CONSENSUS { return [ new_meta, vcfs, tbi] } .set{deepvariant_ch} + deepvariant_ch .join(clair_ch) .map { meta, deepvar_vcf, deepvar_tbi, clair_vcf, clair_tbi -> @@ -102,15 +103,16 @@ workflow SMALL_VARIANT_CONSENSUS { return [meta, vcfs, tbis, file, target, regions] } .set{isec_input} + BCFTOOLS_ISEC(isec_input) - if (params.trust_caller = 'deepvariant') { - BCFTOOLS_ISEC.out.clair_consensus_vcf + if (params.trust_caller == 'deepvariant') { + BCFTOOLS_ISEC.out.deepvar_consensus_vcf .set{vcf} - BCFTOOLS_ISEC.out.clair_consensus_tbi + BCFTOOLS_ISEC.out.deepvar_consensus_tbi .set{tbi} } - if (params.trust_caller = 'clair') { + if (params.trust_caller == 'clair') { BCFTOOLS_ISEC.out.clair_consensus_vcf .set{vcf} BCFTOOLS_ISEC.out.clair_consensus_tbi @@ -132,7 +134,7 @@ workflow SMALL_VARIANT_CONSENSUS { BCFTOOLS_ISEC(isec_input) - if (params.trust_caller = 'deepvariant') { + if (params.trust_caller == 'deepvariant') { BCFTOOLS_ISEC.out.deepvar_consensus_vcf .join(BCFTOOLS_ISEC.out.deepvar_consensus_tbi) .join(BCFTOOLS_ISEC.out.clair_private_vcf) @@ -146,7 +148,7 @@ workflow SMALL_VARIANT_CONSENSUS { .set{concat_out} } - else if (params.trust_caller = 'clair') { + else if (params.trust_caller == 'clair') { BCFTOOLS_ISEC.out.deepvar_private_vcf .join(BCFTOOLS_ISEC.out.deepvar_private_tbi) .join(BCFTOOLS_ISEC.out.clair_consensus_vcf) diff --git a/subworkflows/local/tumor_normal_happhase.nf b/subworkflows/local/tumor_normal_happhase.nf deleted file mode 100644 index 8ef04aaa..00000000 --- a/subworkflows/local/tumor_normal_happhase.nf +++ /dev/null @@ -1,375 +0,0 @@ -include { CLAIR3 } from '../../modules/local/clair3/main.nf' -include { LONGPHASE_PHASE } from '../../modules/nf-core/longphase/phase/main.nf' -include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' -include { CLAIRS } from '../../modules/local/clairs/main.nf' -include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat' -include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort' - -include { DEEPVARIANT } from '../../subworkflows/nf-core/deepvariant/main.nf' -include { DEEPSOMATIC } from '../../subworkflows/local/deepsomatic.nf' -include { SMALL_VARIANT_CONSENSUS as GERMLINE_CONSENSUS } from '../../subworkflows/local/small_variant_consensus.nf' -include { SMALL_VARIANT_CONSENSUS as SOMATIC_CONSENSUS } from '../../subworkflows/local/small_variant_consensus.nf' - - - -workflow TUMOR_NORMAL_HAPPHASE { - take: - mixed_bams - fasta - fai - downloaded_clair3_models - - main: - - ch_versions = channel.empty() - tumor_normal_severus = channel.empty() - somatic_vep = channel.empty() - germline_vep = channel.empty() - - // Branch input bams in normal and tumour - mixed_bams - .branch{ meta, _bam, _bai -> - normal: meta.type == "normal" - tumor: meta.type == "tumor" - } - .set{ mixed_bams } - - // Get normal bams and add platform/model info for Clair3 usage - // remove type from so that information can be merged easier later - - downloaded_clair3_models - .map{ meta, file -> - def clair3_model = meta.id - return [meta, clair3_model, file] - } - .set{downloaded_clair3_models} - - mixed_bams.normal - .map{ meta, bam, bai -> - def new_meta = [id: meta.id, - paired_data: meta.paired_data, - platform: meta.platform, - sex: meta.sex, - fiber: meta.fiber, - clair3_model: meta.clair3_model, - clairS_model: meta.clairS_model, - clairSTO_model: meta.clairSTO_model, - kinetics: meta.kinetics] - return [ new_meta, meta.clair3_model, bam, bai ] - } - .set { normal_bams_model } - // [meta, clair3_model_id, bam, bai] -- keyed by model ID for .combine() with downloaded_clair3_models - - normal_bams_model - .combine(downloaded_clair3_models,by:1) - .map {_clair3_model, meta_bam, bam, bai, _meta_model, model -> - def platform = (meta_bam.platform == 'pb') ? 'hifi' : meta_bam.platform - return [meta_bam, bam, bai, model, platform] - } - .set{ normal_bams } - // [meta, bam, bai, clair3_model_dir, platform] -- type excluded from meta; platform is "hifi" for PacBio - - /* - .map{ basecall_model, meta, bam, bai, meta2, model -> - def platform = (meta.platform == "pb") ? "hifi" : "ont" - return [meta, bam, bai, model, platform] - } - */ - - // Get tumour bams - // remove type from so that information can be merged easier later - mixed_bams.tumor - .map{ meta, bam, bai -> - def new_meta = [id: meta.id, - paired_data: meta.paired_data, - platform: meta.platform, - sex: meta.sex, - fiber: meta.fiber, - clair3_model: meta.clair3_model, - clairS_model: meta.clairS_model, - clairSTO_model: meta.clairSTO_model, - kinetics: meta.kinetics] - return[new_meta, bam, bai] - } - .set{ tumor_bams } - // [meta, bam, bai] -- type excluded from meta for downstream groupTuple merge - - // - // MODULE: CLAIR3 - // small germline variant calling - - CLAIR3 ( - normal_bams, - fasta, - fai - ) - - normal_bams - .map {meta, bam, bai, _model, _platform -> - def intervals = [] - return [meta, bam, bai, intervals] - } - .set{deepvar_normal_bams} - - DEEPVARIANT ( - deepvar_normal_bams, - fasta, - fai, - [[:],[]], - [[:],[]] - ) - - DEEPVARIANT.out.vcf - .join(DEEPVARIANT.out.vcf_index) - .map{ meta, vcf, tbi -> - def new_meta = meta + [caller:'deepvariant'] - return [new_meta, vcf, tbi] - } - .set{deepvariant_ch} - - CLAIR3.out.vcf - .join(CLAIR3.out.tbi) - .map { meta, vcf , tbi -> - def new_meta = meta + [caller:'clair3'] - return [new_meta, vcf, tbi] - } - .set{clair3_ch} - // [meta,deepvar_vcf,deepvar_index,clair3_vcf,clair3_index] - - clair3_ch - .mix(deepvariant_ch) - .set{mixed_vcfs} - - GERMLINE_CONSENSUS( - mixed_vcfs, - fasta, - fai, - params.germline_var_keep - ) - - - // Add germline vcf to normal bams - // remove clair3 model information - normal_bams - .join(GERMLINE_CONSENSUS.out.vcf) - .map { meta, bam, bai, _clair3_model, _platform, vcf -> - def svs = [] - def mods = [] - return [meta, bam, bai, vcf, svs, mods] - } - .set{ normal_bams_germlinevcf } - // [meta, bam, bai, germline_vcf, [], []] -- svs and mods are empty placeholders for LONGPHASE_PHASE input - - GERMLINE_CONSENSUS.out.vcf - .map { meta, vcf -> - def extra = [] - return [meta, vcf, extra] - } - .set { germline_vep } - // [meta, clair3_vcf, []] -- germline small variants for VEP annotation - - // - // MODULE: LONGPHASE_PHASE - // - // Phase normals - - LONGPHASE_PHASE ( - normal_bams_germlinevcf, - fasta, - fai - ) - - ch_versions = ch_versions.mix(LONGPHASE_PHASE.out.versions) - - // Add phased vcf to normal bams - // Add type information back - // both are needed for mixing with the tumor bams - - normal_bams - .join(LONGPHASE_PHASE.out.snv_vcf) - .map { meta, bam, bai, _clair3_model, _platform, vcf -> - def new_meta = meta + [type: "normal"] - def svs = [] - def mods = [] - return[new_meta, bam, bai, vcf, svs, mods] - } - .set{ normal_bams } - // [meta+{type:"normal"}, bam, bai, phased_vcf, [], []] -- type re-added; svs and mods are empty placeholders for LONGPHASE_HAPLOTAG - - // Add phased vcf to tumour bams and type information - // mix with the normal bams - - tumor_bams - .join(LONGPHASE_PHASE.out.snv_vcf) - .map { meta, bam, bai, vcf -> - def new_meta = meta + [type: "tumor"] - def svs = [] - def mods = [] - return [new_meta, bam, bai, vcf, svs, mods] - } - .mix(normal_bams) - .set{ mixed_bams_vcf } - // [meta+{type}, bam, bai, phased_normal_vcf, [], []] -- tumor and normal items both carry the same phased normal VCF - - // - // MODULE: LONGPHASE_HAPLOTAG - // - - // haplotag tumor and normal bams with normal vcf files for both - LONGPHASE_HAPLOTAG ( - mixed_bams_vcf, - fasta, - fai - ) - - ch_versions = ch_versions.mix(LONGPHASE_HAPLOTAG.out.versions) - - // Get final tagged bams - LONGPHASE_HAPLOTAG.out.bam - .set{ mixed_hapbams } - // [meta+{type}, haplotagged_bam] - - // - // MODULE: SAMTOOLS_INDEX - // - // index the haplotaged bams - - SAMTOOLS_INDEX ( - mixed_hapbams - ) - - // Add index to channel - mixed_bams_vcf - .join(mixed_hapbams) - .join(SAMTOOLS_INDEX.out.bai) - .set{ mixed_hapbams } - // [meta+{type}, orig_bam, orig_bai, vcf, svs, mods, hapbam, hapbai] - - // Group everything back together in one channel - mixed_hapbams - .map { meta, _bam, _bai, _vcf, _snvs, _mods, hapbam, hapbai -> - def new_meta = [id: meta.id, - paired_data: meta.paired_data, - platform: meta.platform, - sex: meta.sex, - fiber: meta.fiber, - clair3_model: meta.clair3_model, - clairS_model: meta.clairS_model, - clairSTO_model: meta.clairSTO_model, - kinetics: meta.kinetics] - return[new_meta, [[type: meta.type], hapbam], [[type: meta.type], hapbai]] - } - .groupTuple(size: 2) - .map{ meta, bam, bai -> - def normal_bam = bam[0][0].type == "normal" ? bam[0][1] : bam[1][1] - def tumor_bam = bam[0][0].type == "tumor" ? bam[0][1] : bam[1][1] - def normal_bai = bai[0][0].type == "normal" ? bai[0][1] : bai[1][1] - def tumor_bai = bai[0][0].type == "tumor" ? bai[0][1] : bai[1][1] - // Return channel - return [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] - } - .join(LONGPHASE_PHASE.out.snv_vcf) - .join(LONGPHASE_PHASE.out.snv_vcf_index) - .set{tumor_normal_severus} - // [meta, tumor_hapbam, tumor_bai, normal_hapbam, normal_bai, phased_vcf, phased_tbi] - - // Get ClairS input channel - tumor_normal_severus - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, _vcf, _tbi -> - return[meta , tumor_bam, tumor_bai, normal_bam, normal_bai, meta.clairS_model] - } - .set { clairs_input } - - tumor_normal_severus - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, _vcf, _tbi -> - return [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] - } - .set{ deepsomatic_input } - - - - DEEPSOMATIC ( - deepsomatic_input, - [[:],[]], - fasta, - fai, - [[:],[]] - ) - - // - // MODULE: CLAIRS - // - - CLAIRS ( - clairs_input, - fasta, - fai - ) - - CLAIRS.out.vcfs - .join(CLAIRS.out.tbi) - .set{clairs_out} - - // - // MODULE: BCFTOOLS_CONCAT - // - - BCFTOOLS_CONCAT ( - clairs_out - ) - - // - // MODULE: BCFTOOLS_SORT - // - - BCFTOOLS_SORT ( - BCFTOOLS_CONCAT.out.vcf - ) - BCFTOOLS_SORT.out.vcf.view() - BCFTOOLS_SORT.out.tbi.view() - - DEEPSOMATIC.out.vcf - .join(DEEPSOMATIC.out.vcf_index) - .map{ meta, vcf, tbi -> - def new_meta = meta + [caller:'deepsomatic'] - return [new_meta, vcf, tbi] - } - .set{deepsomatic_ch} - - BCFTOOLS_SORT.out.vcf - .join(BCFTOOLS_SORT.out.tbi) - .map { meta, vcf , tbi -> - def new_meta = meta + [caller:'clairs'] - return [new_meta, vcf, tbi] - } - .set{clairs_ch} - // [meta,deepvar_vcf,deepvar_index,clair3_vcf,clair3_index] - clairs_ch.view() - clairs_ch - .mix(deepsomatic_ch) - .set{mixed_somatic_vcfs} - mixed_somatic_vcfs.view() - SOMATIC_CONSENSUS( - mixed_somatic_vcfs, - fasta, - fai, - params.somatic_var_keep - ) - - SOMATIC_CONSENSUS.out.vcf - .map { meta, vcf -> - def extra = [] - return [meta, vcf, extra] - } - .set { somatic_vep } - - // [meta, sorted_clairs_vcf, []] -- somatic small variants (SNV+indel merged) for VEP annotation - - emit: - tumor_normal_severus - somatic_vep - germline_vep - versions = ch_versions - -} diff --git a/subworkflows/local/tumor_only/tumoronly_smallvar.nf b/subworkflows/local/tumor_only/tumoronly_smallvar.nf new file mode 100644 index 00000000..7db49724 --- /dev/null +++ b/subworkflows/local/tumor_only/tumoronly_smallvar.nf @@ -0,0 +1,207 @@ +// IMPORT MODULES +include { CLAIRSTO } from '../../../modules/local/clairsto/main.nf' +include { VCFSPLIT } from '../../../modules/local/vcfsplit/main.nf' + +// IMPORT SUBWORKFLOWS +include { DEEPVARIANT } from '../../../subworkflows/nf-core/deepvariant/main.nf' +include { DEEPSOMATIC } from '../../../subworkflows/local/deepsomatic.nf' +include { SMALL_VARIANT_CONSENSUS as GERMLINE_CONSENSUS } from '../../../subworkflows/local/small_variant_consensus.nf' +include { SMALL_VARIANT_CONSENSUS as SOMATIC_CONSENSUS } from '../../../subworkflows/local/small_variant_consensus.nf' + + +workflow TUMORONLY_SMALLVAR { + + take: + tumor_bams // [ meta, tumor_bams, tumor_bai ] + fasta + fai + pon_channel + + main: + + // empty channel emission + + ch_versions = channel.empty() + somatic_vcf = channel.empty() + germline_vcf = channel.empty() + somatic_tbi = channel.empty() + germline_tbi = channel.empty() + + // CLAIRS-TO (SOMATIC/NONGERMLINE VARIANT CALLING) + + if(params.somatic_var_keep != 'deepvariant') { + tumor_bams + .map { meta, bam, bai -> + return [ meta, bam, bai, meta.clairSTO_model] + } + .combine(pon_channel) + .set{ clairsto_input_ch} + CLAIRSTO ( + clairsto_input_ch, + fasta, + fai + ) + + + // SPLIT CLAIRSTO GERMLINE AND SOMATIC VARIATION + + CLAIRSTO.out.indel_vcf + .join(CLAIRSTO.out.snv_vcf) + .set{ clairsto_combined_vcf } + + VCFSPLIT ( + clairsto_combined_vcf + ) + + VCFSPLIT.out.germline_vcf + .join(VCFSPLIT.out.germline_tbi) + .map { meta, vcf, tbi -> + def new_meta = meta + [caller:'clairs-to'] + return [ new_meta, vcf, tbi] + } + .set{clairsto_germline_ch} + + VCFSPLIT.out.somatic_vcf + .join(VCFSPLIT.out.somatic_tbi) + .map { meta, vcf, tbi -> + def new_meta = meta + [caller:'clairs-to'] + return [ new_meta, vcf, tbi] + } + .set{clairsto_somatic_ch} + } + // DEEPVARIANT + if(params.somatic_var_keep != 'clair') { + tumor_bams + .map { meta, bam, bai -> + def intervals = [] + return [meta,bam,bai, intervals] + } + .set{deepvariant_input_ch} + + DEEPVARIANT ( + deepvariant_input_ch, + fasta, + fai, + [[:],[]], + [[:],[]] + ) + + + DEEPVARIANT.out.vcf + .join(DEEPVARIANT.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepvariant'] + return [new_meta, vcf, tbi] + } + .set{deepvariant_ch} + } + + // COMBINE GERMLINE VARIANTS + if (params.germline_var_keep != 'clair' | params.germline_var_keep != 'deepvariant' ) { + clairsto_germline_ch + .mix(deepvariant_ch) + .set{combined_germline_ch} + + GERMLINE_CONSENSUS( + combined_germline_ch, + fasta, + fai, + params.germline_var_keep + ) + GERMLINE_CONSENSUS.out.vcf + .join(GERMLINE_CONSENSUS.out.tbi) + .set{germline_vcf} + } + else if (params.germline_var_keep == 'clair') { + clairsto_germline_ch + .set{germline_vcf} + } + else if (params.germline_var_keep == 'deepvariant') { + deepvariant_ch + .set{germline_vcf} + } + // DEEPSOMATIC + if(params.somatic_var_keep != 'clair') { + tumor_bams + .map { meta, tumor_bam, tumor_bai -> + def normal_bam = [] + def normal_bai = [] + return [meta,normal_bam,normal_bai,tumor_bam,tumor_bai] + } + .set{deepsomatic_input_ch} + + DEEPSOMATIC ( + deepsomatic_input_ch, + [[:],[]], + fasta, + fai, + [[:],[]] + ) + DEEPSOMATIC.out.vcf + .join(DEEPSOMATIC.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepsomatic'] + return [new_meta, vcf, tbi] + } + .set{deepsomatic_ch} + } + // COMBINE SOMATIC VARIATION + if (params.somatic_var_keep != 'clair' | params.somatic_var_keep != 'deepvariant' ) { + clairsto_somatic_ch + .mix(deepsomatic_ch) + .set{combined_somatic_ch} + + SOMATIC_CONSENSUS( + combined_somatic_ch, + fasta, + fai, + params.somatic_var_keep + ) + SOMATIC_CONSENSUS.out.vcf + .join(SOMATIC_CONSENSUS.out.tbi) + .set{somatic_vcf} + } + else if (params.somatic_var_keep == 'clair') { + clairsto_somatic_ch + .set{somatic_vcf} + } + else if (params.somatic_var_keep == 'deepvariant') { + deepvariant_ch + .set{somatic_vcf} + } + + somatic_vcf + .map{ meta, vcf, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, vcf, tbi] + } + .set{somatic_vcf} + + germline_vcf + .map{ meta, vcf, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, vcf, tbi] + } + .set{germline_vcf} + emit: + somatic_vcf + germline_vcf + + +} \ No newline at end of file diff --git a/subworkflows/local/tumor_only_happhase.nf b/subworkflows/local/tumor_only_happhase.nf deleted file mode 100644 index e68d4c06..00000000 --- a/subworkflows/local/tumor_only_happhase.nf +++ /dev/null @@ -1,252 +0,0 @@ -include { CLAIRSTO } from '../../modules/local/clairsto/main.nf' -include { VCFSPLIT } from '../../modules/local/vcfsplit/main.nf' -include { LONGPHASE_PHASE } from '../../modules/nf-core/longphase/phase/main' -include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' -include { DEEPVARIANT } from '../../subworkflows/nf-core/deepvariant/main.nf' -include { DEEPSOMATIC } from '../../subworkflows/local/deepsomatic.nf' - -include { SMALL_VARIANT_CONSENSUS as GERMLINE_CONSENSUS } from '../../subworkflows/local/small_variant_consensus.nf' -include { SMALL_VARIANT_CONSENSUS as SOMATIC_CONSENSUS } from '../../subworkflows/local/small_variant_consensus.nf' - - -workflow TUMOR_ONLY_HAPPHASE { - - take: - tumor_bams - fasta - fai - pon_channel - - main: - - ch_versions = channel.empty() - tumor_only_severus = channel.empty() - somatic_vep = channel.empty() - germline_vep = channel.empty() - - tumor_bams - .map{ meta, bam, bai -> - return [meta, bam, bai, meta.clairSTO_model] - } - .set{ tumor_bams } - // [meta, bam, bai, clairSTO_model] -- ClairS-TO model string appended for CLAIRSTO input - - tumor_bams - .map { meta, bam, bai, _clairSTO_model -> - def intervals = [] - return [meta,bam,bai, intervals] - } - .set{tumor_bams_deepvar} - - tumor_bams - .map { meta, tumor_bam, tumor_bai, _clairSTO_model -> - def normal_bam = [] - def normal_bai = [] - return [meta,normal_bam,normal_bai,tumor_bam,tumor_bai] - } - .set{tumor_bams_deepsomatic} - - DEEPVARIANT ( - tumor_bams_deepvar, - fasta, - fai, - [[:],[]], - [[:],[]] - ) - - DEEPSOMATIC ( - tumor_bams_deepsomatic, - [[:],[]], - fasta, - fai, - [[:],[]] - ) - - // - // MODULE: CLAIRSTO - // - // call somatic/non-somatic variants - // (* not called as germline * just non-somatic) - tumor_bams.combine(pon_channel).set{new_combine} - new_combine.view() - CLAIRSTO ( - new_combine, - fasta, - fai, - ) - - CLAIRSTO.out.indel_vcf - .join(CLAIRSTO.out.snv_vcf) - .set{ clairsto_vcf } - // [meta, indel_vcf, snv_vcf] -- raw ClairS-TO variant calls - - // - // MODULE: VCFSPLIT - // - // ClairSTO gives outputs in snv.vcf and indel.vcf - // reformats them to be in somatic.vcf and nonsomatic.vcf - - VCFSPLIT ( - clairsto_vcf - ) - - VCFSPLIT.out.germline_vcf - .join(VCFSPLIT.out.germline_tbi) - .map { meta, vcf, tbi -> - def new_meta = meta + [caller:'clairs-to'] - return [ new_meta, vcf, tbi] - } - .set{clairsto_germline_ch} - - DEEPVARIANT.out.vcf - .join(DEEPVARIANT.out.vcf_index) - .map{ meta, vcf, tbi -> - def new_meta = meta + [caller:'deepvariant'] - return [new_meta, vcf, tbi] - } - .set{deepvariant_ch} - - clairsto_germline_ch - .mix(deepvariant_ch) - .set{mixed_vcfs} - - GERMLINE_CONSENSUS( - mixed_vcfs, - fasta, - fai, - params.germline_var_keep - ) - - VCFSPLIT.out.somatic_vcf - .join(VCFSPLIT.out.somatic_tbi) - .map { meta, vcf, tbi -> - def new_meta = meta + [caller:'clairs-to'] - return [ new_meta, vcf, tbi] - } - .set{clairsto_somatic_ch} - - DEEPSOMATIC.out.vcf - .join(DEEPSOMATIC.out.vcf_index) - .map{ meta, vcf, tbi -> - def new_meta = meta + [caller:'deepsomatic'] - return [new_meta, vcf, tbi] - } - .set{deepsomatic_ch} - - clairsto_somatic_ch - .mix(deepsomatic_ch) - .set{mixed_somatic_vcfs} - - SOMATIC_CONSENSUS( - mixed_somatic_vcfs, - fasta, - fai, - params.somatic_var_keep - ) - // Add the nonsomatic vcf info - // remove model info - tumor_bams - .join(GERMLINE_CONSENSUS.out.vcf) - .map{ meta, bam, bai, _model, snps -> - def svs = [] - def mods = [] - return[meta, bam, bai, snps, svs, mods] - } - .set{ tumor_bams_germlinevcf } - // [meta, bam, bai, nonsomatic_vcf, [], []] -- non-somatic variants used for phasing; svs and mods are empty placeholders for LONGPHASE_PHASE input - - SOMATIC_CONSENSUS.out.vcf - .map { meta, vcf -> - def extra = [] - return [meta,vcf, extra] - } - .set { somatic_vep } - // [meta, somatic_vcf, []] -- PASS (somatic) variants for VEP annotation - - GERMLINE_CONSENSUS.out.vcf - .map { meta, vcf -> - def extra = [] - return [meta,vcf, extra] - } - .set { germline_vep } - // [meta, germline_vcf, []] -- non-somatic variants (relabelled PASS) for VEP annotation - - // - // MODULES: LONGPHASE_PHASE - // - // Phase tumor bams on nonsomatic vcf - LONGPHASE_PHASE ( - tumor_bams_germlinevcf, - fasta, - fai - ) - - ch_versions = ch_versions.mix(LONGPHASE_PHASE.out.versions) - - // Add phased nonsomatic vcf info - // remove model info - tumor_bams - .join(LONGPHASE_PHASE.out.snv_vcf) - .map { meta, bam, bai, _model, vcf -> - def new_meta = meta + [type: "tumor"] - def svs = [] - def mods = [] - return [new_meta, bam, bai, vcf, svs, mods] - } - .set{ tumor_bams_phasedvcf } - // [meta+{type:"tumor"}, bam, bai, phased_nonsomatic_vcf, [], []] -- type added; svs and mods are empty placeholders for LONGPHASE_HAPLOTAG - - // - // MODULES: LONGPHASE_HAPLOTAG - // - // Haplotag the tumor bams - - LONGPHASE_HAPLOTAG ( - tumor_bams_phasedvcf, - fasta, - fai - ) - - ch_versions = ch_versions.mix(LONGPHASE_HAPLOTAG.out.versions) - - // grab phased bams - LONGPHASE_HAPLOTAG.out.bam - .set{ haplotagged_bams } - // [meta+{type:"tumor"}, haplotagged_bam] - - // - // MODULES: SAMTOOLS_INDEX - // - // index the haplotagged bams - SAMTOOLS_INDEX ( - haplotagged_bams - ) - - // join information and the phased VCF file - haplotagged_bams - .join(SAMTOOLS_INDEX.out.bai) - .join(LONGPHASE_PHASE.out.snv_vcf) - .join(LONGPHASE_PHASE.out.snv_vcf_index) - .map{ meta, hap_bam, hap_bai, vcf, tbi -> - def new_meta = [id: meta.id, - paired_data: meta.paired_data, - platform: meta.platform, - sex: meta.sex, - fiber: meta.fiber, - clair3_model: meta.clair3_model, - clairS_model: meta.clairS_model, - clairSTO_model: meta.clairSTO_model, - kinetics: meta.kinetics] - return [new_meta, hap_bam, hap_bai, [], [], vcf, tbi] - } - .set{ tumor_only_severus } - // [meta, hap_bam, hap_bai, [], [], phased_vcf, phased_tbi] -- normal_bam and normal_bai are [] (tumor-only mode) - - emit: - tumor_only_severus - somatic_vep - germline_vep - versions = ch_versions - -} diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index 6f296c9d..714b35b4 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -35,13 +35,15 @@ include { ENSEMBLVEP_VEP as SV_VEP } from '../modules/nf-core/ensemblve // // IMPORT SUBWORKFLOWS // -include { PREPARE_REFERENCE_FILES } from '../subworkflows/local/prepare_reference_files' -include { PREPARE_ANNOTATION } from '../subworkflows/local/prepare_annotation' -include { BAM_STATS_SAMTOOLS } from '../subworkflows/nf-core/bam_stats_samtools/main' -include { TUMOR_NORMAL_HAPPHASE } from '../subworkflows/local/tumor_normal_happhase' -include { TUMOR_ONLY_HAPPHASE } from '../subworkflows/local/tumor_only_happhase' - - +include { PREPARE_REFERENCE_FILES } from '../subworkflows/local/prepare_reference_files' +include { PREPARE_ANNOTATION } from '../subworkflows/local/prepare_annotation' +include { BAM_STATS_SAMTOOLS } from '../subworkflows/nf-core/bam_stats_samtools/main' +include { TUMOR_NORMAL_HAPPHASE } from '../subworkflows/local/tumor_normal_happhase' +include { TUMOR_ONLY_HAPPHASE } from '../subworkflows/local/tumor_only_happhase' +include { TUMORONLY_SMALLVAR } from '../subworkflows/local/tumor_only/tumoronly_smallvar' +include { PAIRED_SMALLVAR_SOMATIC } from '../subworkflows/local/paired/paired_smallvar_somatic' +include { PAIRED_SMALLVAR_GERMLINE } from '../subworkflows/local/paired/paired_smallvar_germline' +include { PHASING_HAPLOTYPING } from '../subworkflows/local/phasing_haplotyping' @@ -119,8 +121,6 @@ workflow LRSOMATIC { .of( tuple(pon_files, pon_flags) ) .set { pon_channel } - pon_channel.view() - ch_versions = channel.empty() ch_multiqc_files = channel.empty() @@ -159,9 +159,6 @@ workflow LRSOMATIC { [ meta, bam.flatten()] } .set{ch_samplesheet} - // [meta_full, [bam...]] -- meta now includes: id, paired_data, type, platform, sex, fiber, clair3_model, clairS_model, clairSTO_model, kinetics - - // // SUBWORKFLOW: PREPARE_REFERENCE_FILES @@ -400,44 +397,114 @@ workflow LRSOMATIC { ch_minimap_bam .join(MINIMAP2_ALIGN.out.index) + .set {ch_index_minimap} + + ch_index_minimap .branch { meta, _bams, _bais -> paired: meta.paired_data tumor_only: !meta.paired_data } .set { branched_minimap } + // branched_minimap.paired: [meta, bam, bai] -- one item per sample (tumor AND normal flow separately) // branched_minimap.tumor_only: [meta, bam, bai] - // - // SUBWORKFLOW: TUMOR_NORMAL_HAPPHASE - // - // Phasing/haplotaging/small germline variant calling for tumor-normal samples + TUMORONLY_SMALLVAR( + branched_minimap.tumor_only, + ch_fasta, + ch_fai, + pon_channel + ) + + branched_minimap.paired + .set{paired_ch} + + paired_ch + .branch { meta, _bams, _bais -> + normal: meta.type == "normal" + tumor: meta.type == "tumor" + } + .set{branched_paired_ch} + + branched_paired_ch.normal + .map{ meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, bam, bai] + } + .set{paired_normal_bams} + + branched_paired_ch.tumor + .map{ meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, bam, bai] + } + .join(paired_normal_bams) + .set { somatic_smallvar_input } - TUMOR_NORMAL_HAPPHASE ( - branched_minimap.paired, + PAIRED_SMALLVAR_SOMATIC ( + somatic_smallvar_input, + ch_fasta, + ch_fai + ) + PAIRED_SMALLVAR_GERMLINE ( + branched_paired_ch.normal, ch_fasta, ch_fai, downloaded_clair3_models ) + + PAIRED_SMALLVAR_GERMLINE.out.germline_vcf + .mix(TUMORONLY_SMALLVAR.out.germline_vcf) + .set{ch_germline_vcf} - ch_versions = ch_versions.mix(TUMOR_NORMAL_HAPPHASE.out.versions) - // - // SUBWORKFLOW: TUMOR_ONLY_HAPPHASE - // - // Phasing/haplotagging for tumor only samples + TUMORONLY_SMALLVAR.out.somatic_vcf + .mix(PAIRED_SMALLVAR_SOMATIC.out.somatic_vcf) + .set{ch_somatic_vcf} + ch_index_minimap.view() + ch_germline_vcf.view() - TUMOR_ONLY_HAPPHASE ( - branched_minimap.tumor_only, + PHASING_HAPLOTYPING ( + ch_index_minimap, + ch_germline_vcf, ch_fasta, - ch_fai, - pon_channel + ch_fai ) - germline_vep = TUMOR_NORMAL_HAPPHASE.out.germline_vep.mix(TUMOR_ONLY_HAPPHASE.out.germline_vep) - // [meta, vcf, []] -- germline variants merged from T/N and tumor-only paths - somatic_vep = TUMOR_NORMAL_HAPPHASE.out.somatic_vep.mix(TUMOR_ONLY_HAPPHASE.out.somatic_vep) + + ch_somatic_vcf + .map { meta, vcf, _tbi -> + def extra = [] + return [meta, vcf, extra] + } + .set { somatic_vep } + + ch_germline_vcf + .map { meta, vcf, _tbi -> + def extra = [] + return [meta, vcf, extra] + } + .set { germline_vep } + + /// figure out severus channel structure then test + // [meta, vcf, []] -- somatic variants merged from T/N and tumor-only paths if (!params.skip_vep) { @@ -483,23 +550,34 @@ workflow LRSOMATIC { ) } - ch_versions = ch_versions.mix(TUMOR_ONLY_HAPPHASE.out.versions) - - // Get Severus input channel - TUMOR_NORMAL_HAPPHASE.out.tumor_normal_severus - .mix(TUMOR_ONLY_HAPPHASE.out.tumor_only_severus) - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf, tbi -> - return [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf, tbi] + branched_minimap.tumor_only + .map{ meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, bam, bai] } - .set { severus_reformat } - // [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, phased_tbi] -- normal_bam/bai are [] for tumor-only - + .map{meta, tumor_bam, tumor_bai-> + def normal_bam = [] + def normal_bai = [] + return [meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + } + .mix(somatic_smallvar_input) + .join(PHASING_HAPLOTYPING.out.phased_germline_vcf) + .set{severus_input} + // // MODULE: SEVERUS // SEVERUS ( - severus_reformat, + severus_input, [[:], params.bed_file, params.pon_file] ) From ad88f88c160fff5cb8e7e4bfb213f24ec5b3b832 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 24 Mar 2026 09:19:26 +0100 Subject: [PATCH 16/36] somatic phasing --- conf/modules.config | 18 ++- log.txt | 0 subworkflows/local/phasing_haplotyping.nf | 113 ++++++++++++---- tests/default.nf.test.snap | 154 +++++++++++++++------- workflows/lrsomatic.nf | 6 +- 5 files changed, 213 insertions(+), 78 deletions(-) delete mode 100644 log.txt diff --git a/conf/modules.config b/conf/modules.config index 397e73aa..3b7bd7e9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -317,7 +317,21 @@ process { // Phasing processes // - withName: '.*:LONGPHASE_PHASE' { + withName: '.*:LONGPHASE_PHASE_GERMLINE' { + ext.prefix = { "germline_smallvariants" } + ext.args = { + [ + meta.platform == 'pb' ? '--pb' : '--ont', + "--indels", + ].join(' ').trim() + } + publishDir = [ + path: { "${params.outdir}/${meta.id}/variants/phased" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*:LONGPHASE_PHASE_SOMATIC' { ext.prefix = { "somatic_smallvariants" } ext.args = { [ @@ -331,7 +345,7 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - withName: '.*:LONGPHASE_HAPLOTAG' { + withName: '.*:LONGPHASE_HAPLOTAG*' { ext.prefix = { "${meta.id}_${meta.type}" } publishDir = [ path: { "${params.outdir}/${meta.id}/bamfiles" }, diff --git a/log.txt b/log.txt deleted file mode 100644 index e69de29b..00000000 diff --git a/subworkflows/local/phasing_haplotyping.nf b/subworkflows/local/phasing_haplotyping.nf index 272b5ef0..b543e3cc 100644 --- a/subworkflows/local/phasing_haplotyping.nf +++ b/subworkflows/local/phasing_haplotyping.nf @@ -1,12 +1,19 @@ // Import modules -include { LONGPHASE_PHASE } from '../../modules/nf-core/longphase/phase/main.nf' -include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' -include { LONGPHASE_MODCALL } from '../../modules/local/longphase/modcall/main.nf' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' +include { LONGPHASE_PHASE as LONGPHASE_PHASE_GERMLINE } from '../../modules/nf-core/longphase/phase/main.nf' +include { LONGPHASE_PHASE as LONGPHASE_PHASE_SOMATIC } from '../../modules/nf-core/longphase/phase/main.nf' +include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' +include { LONGPHASE_MODCALL as LONGPHASE_MODCALL_GERMLINE } from '../../modules/local/longphase/modcall/main.nf' +include { LONGPHASE_MODCALL as LONGPHASE_MODCALL_SOMATIC } from '../../modules/local/longphase/modcall/main.nf' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' +include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main' + + workflow PHASING_HAPLOTYPING { take: tumor_normal_bams // [meta, bam, bai] germline_vcf + somatic_vcf fasta fai @@ -77,14 +84,23 @@ workflow PHASING_HAPLOTYPING { tumor_only_ch .mix(paired_normal_ch) - .set { longphase_modcall_input_ch } + .set { normal_bams_w_tumoronly_ch } + tumor_only_ch + .mix(paired_tumor_ch) + .set{ tumor_bams_ch} // MODCALL if (!params.skip_modcall) { - LONGPHASE_MODCALL ( - longphase_modcall_input_ch, + LONGPHASE_MODCALL_GERMLINE ( + normal_bams_w_tumoronly_ch, + fasta, + fai + ) + + LONGPHASE_MODCALL_SOMATIC ( + tumor_bams_ch, fasta, fai ) @@ -92,33 +108,78 @@ workflow PHASING_HAPLOTYPING { } // PHASING if (!params.skip_modcall) { - longphase_modcall_input_ch + normal_bams_w_tumoronly_ch .join(germline_vcf) - .join(LONGPHASE_MODCALL.out.mod_vcf) + .join(LONGPHASE_MODCALL_GERMLINE.out.mod_vcf) .map { meta, bam, bai, vcf, _tbi, mods-> def svs = [] return [ meta, bam, bai, vcf, svs, mods ] } - .set{ longphase_phase_input_ch } + .set{ longphase_phase_germline_input_ch } + + germline_vcf + .join(somatic_vcf) + .map { meta, germline_vcf, germline_tbi, somatic_vcf, somatic_tbi -> + def vcfs = [somatic_vcf, germline_vcf] + def tbis = [somatic_tbi, germline_tbi] + return [ meta, vcfs, tbis] + } + .set{germline_somatic_vcfs} + BCFTOOLS_CONCAT(germline_somatic_vcfs) + BCFTOOLS_CONCAT.out.vcf + .set{concat_out} + BCFTOOLS_SORT(concat_out) + BCFTOOLS_SORT.out.vcf + .set{germline_somatic_vcfs} + + tumor_bams_ch + .join(germline_somatic_vcfs) + .join(LONGPHASE_MODCALL_SOMATIC.out.mod_vcf) + .map { meta, bam, bai, vcf, mods-> + def svs = [] + return [ meta, bam, bai, vcf, svs, mods ] + } + .set{ longphase_phase_somatic_input_ch } } else { - longphase_modcall_input_ch + normal_bams_w_tumoronly_ch .join(germline_vcf) - .map { meta, bam, bai, vcf, _tbi-> + .map { meta, bam, bai, vcf, _tbi -> def svs = [] def mods = [] return [ meta, bam, bai, vcf, svs, mods ] } - .set{ longphase_phase_input_ch } + .set{ longphase_phase_germline_input_ch } + + tumor_bams_ch + .join(germline_somatic_vcfs) + .join(LONGPHASE_MODCALL_SOMATIC.out.mod_vcf) + .map { meta, bam, bai, vcf -> + def svs = [] + def mods = [] + return [ meta, bam, bai, vcf, svs, mods ] + } + .set{ longphase_phase_somatic_input_ch } } - LONGPHASE_PHASE ( - longphase_phase_input_ch, + + LONGPHASE_PHASE_GERMLINE ( + longphase_phase_germline_input_ch, fasta, fai ) - LONGPHASE_PHASE.out.snv_vcf - .join(LONGPHASE_PHASE.out.snv_vcf_index) + LONGPHASE_PHASE_GERMLINE.out.snv_vcf + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf_index) + .set{ phased_germline_vcf } + + LONGPHASE_PHASE_SOMATIC ( + longphase_phase_somatic_input_ch, + fasta, + fai + ) + + LONGPHASE_PHASE_SOMATIC.out.snv_vcf + .join(LONGPHASE_PHASE_SOMATIC.out.snv_vcf_index) .set{ phased_germline_vcf } // HAPLOTAGING @@ -127,7 +188,7 @@ workflow PHASING_HAPLOTYPING { if(!params.skip_modcall) { - LONGPHASE_MODCALL.out.mod_vcf + LONGPHASE_MODCALL_GERMLINE.out.mod_vcf .map { meta, mods -> def new_meta = meta.subMap('id', 'paired_data', @@ -143,7 +204,7 @@ workflow PHASING_HAPLOTYPING { .set{modcall_vcf_ch} tumor_only_ch - .join(LONGPHASE_PHASE.out.snv_vcf) + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) .join(modcall_vcf_ch) .map { meta, bam, bai, vcf, mods -> def new_meta = meta + [type : "tumor"] @@ -153,7 +214,7 @@ workflow PHASING_HAPLOTYPING { .set{ tumor_only_ch } paired_tumor_ch - .join(LONGPHASE_PHASE.out.snv_vcf) + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) .join(modcall_vcf_ch) .map { meta, bam, bai, vcf, mods -> def new_meta = meta + [type : "tumor"] @@ -163,7 +224,7 @@ workflow PHASING_HAPLOTYPING { .set{ paired_tumor_ch } paired_normal_ch - .join(LONGPHASE_PHASE.out.snv_vcf) + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) .join(modcall_vcf_ch) .map { meta, bam, bai, vcf, mods -> def new_meta = meta + [type : "normal"] @@ -176,7 +237,7 @@ workflow PHASING_HAPLOTYPING { else { tumor_only_ch - .join(LONGPHASE_PHASE.out.snv_vcf) + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) .map { meta, bam, bai, vcf -> def new_meta = meta + [type : "tumor"] def svs = [] @@ -186,7 +247,7 @@ workflow PHASING_HAPLOTYPING { .set{ tumor_only_ch } paired_tumor_ch - .join(LONGPHASE_PHASE.out.snv_vcf) + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) .map { meta, bam, bai, vcf -> def new_meta = meta + [type : "tumor"] def svs = [] @@ -196,7 +257,7 @@ workflow PHASING_HAPLOTYPING { .set{ paired_tumor_ch } paired_normal_ch - .join(LONGPHASE_PHASE.out.snv_vcf) + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) .map { meta, bam, bai, vcf -> def new_meta = meta + [type : "normal"] def svs = [] @@ -208,8 +269,8 @@ workflow PHASING_HAPLOTYPING { } tumor_only_ch - .join(paired_tumor_ch) - .join(paired_normal_ch) + .mix(paired_tumor_ch) + .mix(paired_normal_ch) .set {longphase_haplotag_input_ch} LONGPHASE_HAPLOTAG ( diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index bd36bc15..778a5df1 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -2,9 +2,21 @@ "-profile test": { "content": [ { + "BCFTOOLS_ANNOTATE": { + "bcftools": 1.22 + }, "BCFTOOLS_CONCAT": { "bcftools": 1.22 }, + "BCFTOOLS_ISEC": { + "bcftools": 1.22 + }, + "BCFTOOLS_NORM": { + "bcftools": 1.22 + }, + "BCFTOOLS_QUERY": { + "bcftools": 1.22 + }, "BCFTOOLS_SORT": { "bcftools": 1.22 }, @@ -23,18 +35,29 @@ "CRAMINO_PRE": { "cramino": "1.3.0" }, + "DEEPSOMATIC_CALLVARIANTS": { + "deepsomatic": "1.7.0" + }, + "DEEPSOMATIC_MAKEEXAMPLES": { + "deepsomatic": "1.7.0" + }, + "DEEPSOMATIC_POSTPROCESSVARIANTS": { + "deepsomatic": "1.7.0" + }, + "DEEPVARIANT_CALLVARIANTS": { + "deepvariant": "1.9.0" + }, + "DEEPVARIANT_MAKEEXAMPLES": { + "deepvariant": "1.9.0" + }, + "DEEPVARIANT_POSTPROCESSVARIANTS": { + "deepvariant": "1.9.0" + }, "GERMLINE_VEP": { "ensemblvep": 115.2, "perl-math-cdf": 0.1, "tabix": 1.21 }, - "LONGPHASE_HAPLOTAG": { - "longphase": 2.0 - }, - "LONGPHASE_PHASE": { - "longphase": 2.0, - "tabix": "1.22.1" - }, "METAEXTRACT": { "samtools": 1.21 }, @@ -53,9 +76,6 @@ "SAMTOOLS_IDXSTATS": { "samtools": "1.22.1" }, - "SAMTOOLS_INDEX": { - "samtools": "1.22.1" - }, "SAMTOOLS_STATS": { "samtools": "1.22.1" }, @@ -89,6 +109,10 @@ } }, [ + "longphase", + "longphase/sample1.vcf", + "longphase/sample2.vcf", + "longphase/sample3.vcf", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/llms-full.txt", @@ -164,11 +188,6 @@ "pipeline_info", "pipeline_info/lrsomatic_software_mqc_versions.yml", "sample1", - "sample1/bamfiles", - "sample1/bamfiles/sample1_normal.bam", - "sample1/bamfiles/sample1_normal.bam.bai", - "sample1/bamfiles/sample1_tumor.bam", - "sample1/bamfiles/sample1_tumor.bam.bai", "sample1/qc", "sample1/qc/normal", "sample1/qc/normal/cramino_aln", @@ -241,9 +260,21 @@ "sample1/variants/clairs/indel.vcf.gz.tbi", "sample1/variants/clairs/snvs.vcf.gz", "sample1/variants/clairs/snvs.vcf.gz.tbi", + "sample1/variants/deepsomatic", + "sample1/variants/deepsomatic/sample1.g.vcf.gz", + "sample1/variants/deepsomatic/sample1.g.vcf.gz.tbi", + "sample1/variants/deepsomatic/sample1.vcf.gz", + "sample1/variants/deepsomatic/sample1.vcf.gz.tbi", + "sample1/variants/deepvariant", + "sample1/variants/deepvariant/sample1.g.vcf.gz", + "sample1/variants/deepvariant/sample1.g.vcf.gz.tbi", + "sample1/variants/deepvariant/sample1.vcf.gz", + "sample1/variants/deepvariant/sample1.vcf.gz.tbi", "sample1/variants/phased", "sample1/variants/phased/somatic_smallvariants.vcf.gz", "sample1/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample1/variants/phased/somatic_smallvariants_mod.vcf.gz", + "sample1/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", "sample1/variants/severus", "sample1/variants/severus/all_SVs", "sample1/variants/severus/all_SVs/severus_all.vcf.gz", @@ -267,11 +298,6 @@ "sample1/vep/somatic/sample1_SOMATIC_VEP.vcf.gz.tbi", "sample1/vep/somatic/sample1_SOMATIC_VEP.vcf.gz_summary.html", "sample2", - "sample2/bamfiles", - "sample2/bamfiles/sample2_normal.bam", - "sample2/bamfiles/sample2_normal.bam.bai", - "sample2/bamfiles/sample2_tumor.bam", - "sample2/bamfiles/sample2_tumor.bam.bai", "sample2/qc", "sample2/qc/normal", "sample2/qc/normal/cramino_aln", @@ -344,9 +370,21 @@ "sample2/variants/clairs/indel.vcf.gz.tbi", "sample2/variants/clairs/snvs.vcf.gz", "sample2/variants/clairs/snvs.vcf.gz.tbi", + "sample2/variants/deepsomatic", + "sample2/variants/deepsomatic/sample2.g.vcf.gz", + "sample2/variants/deepsomatic/sample2.g.vcf.gz.tbi", + "sample2/variants/deepsomatic/sample2.vcf.gz", + "sample2/variants/deepsomatic/sample2.vcf.gz.tbi", + "sample2/variants/deepvariant", + "sample2/variants/deepvariant/sample2.g.vcf.gz", + "sample2/variants/deepvariant/sample2.g.vcf.gz.tbi", + "sample2/variants/deepvariant/sample2.vcf.gz", + "sample2/variants/deepvariant/sample2.vcf.gz.tbi", "sample2/variants/phased", "sample2/variants/phased/somatic_smallvariants.vcf.gz", "sample2/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample2/variants/phased/somatic_smallvariants_mod.vcf.gz", + "sample2/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", "sample2/variants/severus", "sample2/variants/severus/all_SVs", "sample2/variants/severus/all_SVs/severus_all.vcf.gz", @@ -370,9 +408,6 @@ "sample2/vep/somatic/sample2_SOMATIC_VEP.vcf.gz.tbi", "sample2/vep/somatic/sample2_SOMATIC_VEP.vcf.gz_summary.html", "sample3", - "sample3/bamfiles", - "sample3/bamfiles/sample3_tumor.bam", - "sample3/bamfiles/sample3_tumor.bam.bai", "sample3/qc", "sample3/qc/tumor", "sample3/qc/tumor/cramino_aln", @@ -415,9 +450,21 @@ "sample3/variants/clairsto/snv.vcf.gz.tbi", "sample3/variants/clairsto/somatic.vcf.gz", "sample3/variants/clairsto/somatic.vcf.gz.tbi", + "sample3/variants/deepsomatic", + "sample3/variants/deepsomatic/sample3.g.vcf.gz", + "sample3/variants/deepsomatic/sample3.g.vcf.gz.tbi", + "sample3/variants/deepsomatic/sample3.vcf.gz", + "sample3/variants/deepsomatic/sample3.vcf.gz.tbi", + "sample3/variants/deepvariant", + "sample3/variants/deepvariant/sample3.g.vcf.gz", + "sample3/variants/deepvariant/sample3.g.vcf.gz.tbi", + "sample3/variants/deepvariant/sample3.vcf.gz", + "sample3/variants/deepvariant/sample3.vcf.gz.tbi", "sample3/variants/phased", "sample3/variants/phased/somatic_smallvariants.vcf.gz", "sample3/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample3/variants/phased/somatic_smallvariants_mod.vcf.gz", + "sample3/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", "sample3/variants/severus", "sample3/variants/severus/all_SVs", "sample3/variants/severus/all_SVs/severus_all.vcf.gz", @@ -442,43 +489,60 @@ "sample3/vep/somatic/sample3_SOMATIC_VEP.vcf.gz_summary.html" ], [ - "sample1_normal.bam:md5,58854f8d427538288322ac20df574fe1", - "sample1_normal.bam.bai:md5,398a33445ef807ebc83c851c3f3c0df6", - "sample1_tumor.bam:md5,0e2ebc65d456e9aa614f5e4714ea2f97", - "sample1_tumor.bam.bai:md5,b92667a80d721b8d515ad099e4f97925", + "sample1.vcf:md5,26cef7c7c05a6d2e076aeb0e0bef7fe2", + "sample2.vcf:md5,e50e314896682852a973f1f9236f908c", + "sample3.vcf:md5,009e8d6ef6736c8f7760d3cbbbf91f73", "sample1.flagstat:md5,1c41ea9923945501eb7e41f83a90502d", "sample1.idxstats:md5,902e503387799123ea59255e3fca172c", - "sample1.stats:md5,5a76f92088d36f8e93d72351e521b59b", + "sample1.stats:md5,70fabbdc07dec0479b3fc7dcec344054", "sample1.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", "sample1.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", - "sample1.stats:md5,8cec99bd9c1ba4ee22619b66d4fec02a", - "breakpoints_double.csv:md5,fd92fe40bc0ab3b836dedc395b80d6e2", + "sample1.stats:md5,5012c82d3d3ca60ffdd2fb970f772566", + "sample1.g.vcf.gz:md5,e9c0e1c7f90d334faccab6b854611643", + "sample1.g.vcf.gz.tbi:md5,6110e6f6ab72dbc0bee604afe690893f", + "sample1.vcf.gz:md5,92c0fa9016c3d8b192eb382fd6a81199", + "sample1.vcf.gz.tbi:md5,04e82577dc57f80b9db25897389364b2", + "sample1.g.vcf.gz:md5,5ed06f35ccecf7aadbec54873dc07e64", + "sample1.g.vcf.gz.tbi:md5,ca628ef368d34a7a6c77098a4c4bdf36", + "sample1.vcf.gz:md5,ad971a535d2b8014fabdabe72995a5db", + "sample1.vcf.gz.tbi:md5,65de5caace8d4312afa707c3bfd6fa45", + "breakpoints_double.csv:md5,d63f058075bfe791248954ca2ee6c4fb", "read_qual.txt:md5,78247dfa2ea336eac0e128eba5e9eef4", - "sample2_normal.bam:md5,c9b1ac8d2faec01ecb178f0b351af69c", - "sample2_normal.bam.bai:md5,b8f81d2703b1d06128e23b860a9fd635", - "sample2_tumor.bam:md5,2ba9456992d944b63d63e067d5e3bc56", - "sample2_tumor.bam.bai:md5,8342f0fbbc8ce4f4c79aa42c7804df9d", "sample2.flagstat:md5,714d0cc0c213e2640e54a16f3d0e6e7e", "sample2.idxstats:md5,72eb83bb11748dc863fef1a0a5497e4b", - "sample2.stats:md5,9afe66549bed997cce97dfa11c0f0cca", + "sample2.stats:md5,87cb6e9adf8a133244e8b331be43bb14", "sample2.flagstat:md5,4344a8745efef9cc2a017024218d61c6", "sample2.idxstats:md5,69467fc02c83a30084736aeea8b785fb", - "sample2.stats:md5,7cbbff1faaf2e030470a8c1e69434b48", - "breakpoints_double.csv:md5,d3f0957887406fb79f9dcc3707324d8c", + "sample2.stats:md5,1e044857eeefb284fda88ee58ff7a04a", + "sample2.g.vcf.gz:md5,1cb2d7bf929e4e87a8591c11bf5034c7", + "sample2.g.vcf.gz.tbi:md5,67674d00b976135fe29f318067a01f7a", + "sample2.vcf.gz:md5,f9d6266ee49c2544dc28eef0cdd9dcae", + "sample2.vcf.gz.tbi:md5,433ecd219947787d00dd0fada6307661", + "sample2.g.vcf.gz:md5,7998d15ce582ec94e86568e3fae654f9", + "sample2.g.vcf.gz.tbi:md5,9498e3266900eee2448ff05b0bce87c0", + "sample2.vcf.gz:md5,33330d8444a1774864883b33e1e2235c", + "sample2.vcf.gz.tbi:md5,384b8b5ea7c1559b464031f8e0863532", + "breakpoints_double.csv:md5,a68d7fc9c7a7cb2f31e73189c5412f7b", "read_qual.txt:md5,8b92ff7dc4536188be159b95525511cd", - "sample3_tumor.bam:md5,7107cfc84eafca8f1ae918e775111090", - "sample3_tumor.bam.bai:md5,d2855691846361e01999895250c835e4", "sample3.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", "sample3.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", - "sample3.stats:md5,6825d4e497aef80ed7160afbef5076d9", - "breakpoints_double.csv:md5,41bb00e81dd6c319c13e754fa853ca68", + "sample3.stats:md5,d7a8552a8a41a217954a0c825d468a60", + "sample3.g.vcf.gz:md5,2392430290a5d6a9a43b359faebc025b", + "sample3.g.vcf.gz.tbi:md5,4349b51070a4274707b94ef9b4d8c0bd", + "sample3.vcf.gz:md5,ee4f7a5bbd471c1370accf888b8262c0", + "sample3.vcf.gz.tbi:md5,944349ec46ecc95bab7db2f6848d0c36", + "sample3.g.vcf.gz:md5,64adc407ef2434254e76763b72c67b08", + "sample3.g.vcf.gz.tbi:md5,ac0397f7a840cf618d56f097defba878", + "sample3.vcf.gz:md5,9c52a803d796157a08b565c653d4685c", + "sample3.vcf.gz.tbi:md5,f0b3599e4281a6f3bbb8bc408740e58e", + "breakpoints_double.csv:md5,46e03bf2d67aa736b599f00fe7f01e06", "read_qual.txt:md5,b918430d35354dad1d7f02f21e4cd4ed" ] ], - "timestamp": "2026-03-13T14:26:30.747596368", "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.0" - } + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-23T16:14:08.943804849" } } \ No newline at end of file diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index 714b35b4..add2d641 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -38,8 +38,6 @@ include { ENSEMBLVEP_VEP as SV_VEP } from '../modules/nf-core/ensemblve include { PREPARE_REFERENCE_FILES } from '../subworkflows/local/prepare_reference_files' include { PREPARE_ANNOTATION } from '../subworkflows/local/prepare_annotation' include { BAM_STATS_SAMTOOLS } from '../subworkflows/nf-core/bam_stats_samtools/main' -include { TUMOR_NORMAL_HAPPHASE } from '../subworkflows/local/tumor_normal_happhase' -include { TUMOR_ONLY_HAPPHASE } from '../subworkflows/local/tumor_only_happhase' include { TUMORONLY_SMALLVAR } from '../subworkflows/local/tumor_only/tumoronly_smallvar' include { PAIRED_SMALLVAR_SOMATIC } from '../subworkflows/local/paired/paired_smallvar_somatic' include { PAIRED_SMALLVAR_GERMLINE } from '../subworkflows/local/paired/paired_smallvar_germline' @@ -478,12 +476,10 @@ workflow LRSOMATIC { .mix(PAIRED_SMALLVAR_SOMATIC.out.somatic_vcf) .set{ch_somatic_vcf} - ch_index_minimap.view() - ch_germline_vcf.view() - PHASING_HAPLOTYPING ( ch_index_minimap, ch_germline_vcf, + ch_somatic_vcf, ch_fasta, ch_fai ) From 7e414e01403eef4b7db03059faf3db0a310db266 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 24 Mar 2026 12:50:41 +0100 Subject: [PATCH 17/36] gpu optional flag, and snap updates --- .gitignore | 2 +- conf/base.config | 8 +- conf/modules.config | 21 +++-- modules.json | 26 ++++-- modules/local/clair3/main.nf | 2 +- .../local/deepsomatic/callvariants/main.nf | 2 +- modules/local/fibertoolsrs/fire/main.nf | 2 +- .../local/fibertoolsrs/nucleosomes/main.nf | 2 +- modules/local/fibertoolsrs/predictm6a/main.nf | 2 +- .../nf-core/bcftools/isec/bcftools-isec.diff | 36 ++++++++ .../bcftools/merge/bcftools-merge.diff | 21 +++++ .../bcftools/query/bcftools-query.diff | 30 +++++++ .../deepvariant-callvariants.diff | 18 ++++ .../nf-core/deepvariant/callvariants/main.nf | 2 +- .../deepvariant-makeexamples.diff | 18 ++++ .../deepvariant-postprocessvariants.diff | 18 ++++ modules/nf-core/severus/main.nf | 8 +- nextflow.config | 1 + nextflow_schema.json | 7 +- ro-crate-metadata.json | 2 +- subworkflows/local/deepsomatic.nf | 2 +- .../local/paired/paired_smallvar_germline.nf | 2 +- .../local/paired/paired_smallvar_somatic.nf | 6 +- subworkflows/local/phasing_haplotyping.nf | 56 ++++++------ subworkflows/local/small_variant_consensus.nf | 12 +-- .../local/tumor_only/tumoronly_smallvar.nf | 10 +-- .../nf-core/deepvariant/deepvariant.diff | 23 +++++ tests/default.nf.test.snap | 85 ++++++++++++++----- workflows/lrsomatic.nf | 16 ++-- 29 files changed, 333 insertions(+), 107 deletions(-) create mode 100644 modules/nf-core/bcftools/isec/bcftools-isec.diff create mode 100644 modules/nf-core/bcftools/merge/bcftools-merge.diff create mode 100644 modules/nf-core/bcftools/query/bcftools-query.diff create mode 100644 modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff create mode 100644 modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff create mode 100644 modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff create mode 100644 subworkflows/nf-core/deepvariant/deepvariant.diff diff --git a/.gitignore b/.gitignore index 8be7152c..c5c144e3 100644 --- a/.gitignore +++ b/.gitignore @@ -9,4 +9,4 @@ testing* null/ .nf-test .nf-test.log -out/ \ No newline at end of file +out/ diff --git a/conf/base.config b/conf/base.config index 79e7c3ee..cd23b577 100644 --- a/conf/base.config +++ b/conf/base.config @@ -27,8 +27,8 @@ process { // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors withLabel: process_gpu { - ext.use_gpu = { workflow.profile.contains('gpu') } - accelerator = { workflow.profile.contains('gpu') ? 1 : null } + ext.use_gpu = { params.use_gpu as boolean } + accelerator = { (params.use_gpu as boolean) ? 1 : null } } withLabel:process_single { cpus = { 1 } @@ -68,8 +68,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withLabel: process_gpu { - ext.use_gpu = { workflow.profile.contains('gpu') } - accelerator = { workflow.profile.contains('gpu') ? 1 : null } - } } diff --git a/conf/modules.config b/conf/modules.config index 3b7bd7e9..d9dac1fa 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -195,17 +195,19 @@ process { publishDir = [ path: { "${params.outdir}/${meta.id}/variants/deepsomatic" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> + (filename.equals('versions.yml') || filename.endsWith('.g.vcf.gz') || filename.endsWith('.g.vcf.gz.tbi')) ? null : filename + } ] } withName: '.*DEEPSOMATIC_CALLVARIANTS' { ext.args = { - meta.platform == 'pb' - ? (meta.paired_data - ? "--checkpoint '/opt/models/deepsomatic/pacbio'" - : "--checkpoint '/opt/models/deepsomatic/pacbio_tumor_only'" ) - : (meta.paired_data + meta.platform == 'pb' + ? (meta.paired_data + ? "--checkpoint '/opt/models/deepsomatic/pacbio'" + : "--checkpoint '/opt/models/deepsomatic/pacbio_tumor_only'" ) + : (meta.paired_data ? "--checkpoint '/opt/models/deepsomatic/ont'" : "--checkpoint '/opt/models/deepsomatic/ont_tumor_only'") } @@ -213,7 +215,7 @@ process { enabled : false ] } - + withName: '.*:UNZIP_.*' { publishDir = [ @@ -353,6 +355,11 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: '.*:LONGPHASE_MODCALL.*' { + publishDir = [ + enabled: false + ] + } // // Structural variant calling processes diff --git a/modules.json b/modules.json index ce434e66..c0168555 100644 --- a/modules.json +++ b/modules.json @@ -24,12 +24,14 @@ "bcftools/isec": { "branch": "master", "git_sha": "3b2c3559699a7bca6a7c2b220695a072e030e17d", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/isec/bcftools-isec.diff" }, "bcftools/merge": { "branch": "master", "git_sha": "3d9c2f4beaa4f62b3f006928fd9095a496d1e5a8", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/merge/bcftools-merge.diff" }, "bcftools/norm": { "branch": "master", @@ -39,7 +41,8 @@ "bcftools/query": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/query/bcftools-query.diff" }, "bcftools/sort": { "branch": "master", @@ -50,17 +53,20 @@ "deepvariant/callvariants": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": ["deepvariant"] + "installed_by": ["deepvariant"], + "patch": "modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff" }, "deepvariant/makeexamples": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": ["deepvariant"] + "installed_by": ["deepvariant"], + "patch": "modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff" }, "deepvariant/postprocessvariants": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": ["deepvariant"] + "installed_by": ["deepvariant"], + "patch": "modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff" }, "ensemblvep/download": { "branch": "master", @@ -110,6 +116,11 @@ "git_sha": "2c73cc8fa92cf48de3da0b643fdf357a8a290b36", "installed_by": ["modules"] }, + "nanoplot": { + "branch": "master", + "git_sha": "682f789f93070bd047868300dd018faf3d434e7c", + "installed_by": ["modules"] + }, "pigz/uncompress": { "branch": "master", "git_sha": "f84336b7fa91a65aa61d215b8c109fbb8e4b4ac6", @@ -179,7 +190,8 @@ "deepvariant": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": ["subworkflows"] + "installed_by": ["subworkflows"], + "patch": "subworkflows/nf-core/deepvariant/deepvariant.diff" }, "utils_nextflow_pipeline": { "branch": "master", diff --git a/modules/local/clair3/main.nf b/modules/local/clair3/main.nf index ab398bab..f53b8b58 100644 --- a/modules/local/clair3/main.nf +++ b/modules/local/clair3/main.nf @@ -1,7 +1,7 @@ process CLAIR3 { tag "$meta.id" label 'process_very_high' - label 'process_gpu' + label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/deepsomatic/callvariants/main.nf b/modules/local/deepsomatic/callvariants/main.nf index f796de51..4906b954 100644 --- a/modules/local/deepsomatic/callvariants/main.nf +++ b/modules/local/deepsomatic/callvariants/main.nf @@ -1,7 +1,7 @@ process DEEPSOMATIC_CALLVARIANTS { tag "$meta.id" label 'process_high' - label 'process_gpu' + label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" //Conda is not supported at the moment container "docker.io/google/deepsomatic:1.7.0" diff --git a/modules/local/fibertoolsrs/fire/main.nf b/modules/local/fibertoolsrs/fire/main.nf index 2d84e7e0..e78bf544 100644 --- a/modules/local/fibertoolsrs/fire/main.nf +++ b/modules/local/fibertoolsrs/fire/main.nf @@ -2,7 +2,7 @@ process FIBERTOOLSRS_FIRE { tag "$meta.id" label 'process_very_high' label 'process_high_memory' - label 'process_gpu' + label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/fibertoolsrs/nucleosomes/main.nf b/modules/local/fibertoolsrs/nucleosomes/main.nf index 7462cd68..2357d638 100644 --- a/modules/local/fibertoolsrs/nucleosomes/main.nf +++ b/modules/local/fibertoolsrs/nucleosomes/main.nf @@ -2,7 +2,7 @@ process FIBERTOOLSRS_NUCLEOSOMES { tag "$meta.id" label 'process_very_high' label 'process_high_memory' - label 'process_gpu' + label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/fibertoolsrs/predictm6a/main.nf b/modules/local/fibertoolsrs/predictm6a/main.nf index 0dabcd20..0ac25676 100644 --- a/modules/local/fibertoolsrs/predictm6a/main.nf +++ b/modules/local/fibertoolsrs/predictm6a/main.nf @@ -2,7 +2,7 @@ process FIBERTOOLSRS_PREDICTM6A { tag "$meta.id" label 'process_very_high' label 'process_high_memory' - label 'process_gpu' + label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/nf-core/bcftools/isec/bcftools-isec.diff b/modules/nf-core/bcftools/isec/bcftools-isec.diff new file mode 100644 index 00000000..36911c50 --- /dev/null +++ b/modules/nf-core/bcftools/isec/bcftools-isec.diff @@ -0,0 +1,36 @@ +Changes in component 'nf-core/bcftools/isec' +'modules/nf-core/bcftools/isec/meta.yml' is unchanged +Changes in 'bcftools/isec/main.nf': +--- modules/nf-core/bcftools/isec/main.nf ++++ modules/nf-core/bcftools/isec/main.nf +@@ -12,6 +12,15 @@ + + output: + tuple val(meta), path("${prefix}", type: "dir"), emit: results ++ tuple val(meta), path("${prefix}/0002.vcf.gz"), emit: deepvar_consensus_vcf ++ tuple val(meta), path("${prefix}/0002.vcf.gz.tbi"), emit: deepvar_consensus_tbi ++ tuple val(meta), path("${prefix}/0003.vcf.gz"), emit: clair_consensus_vcf ++ tuple val(meta), path("${prefix}/0003.vcf.gz.tbi"), emit: clair_consensus_tbi ++ tuple val(meta), path("${prefix}/0001.vcf.gz"), emit: clair_private_vcf ++ tuple val(meta), path("${prefix}/0001.vcf.gz.tbi"), emit: clair_private_tbi ++ tuple val(meta), path("${prefix}/0000.vcf.gz"), emit: deepvar_private_vcf ++ tuple val(meta), path("${prefix}/0000.vcf.gz.tbi"), emit: deepvar_private_tbi ++ + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: +@@ -30,7 +39,7 @@ + ${targets_file_args} \\ + ${regions_file_args} \\ + -p ${prefix} \\ +- ${vcf_files} \\ ++ ${vcf_files} + """ + + stub: + +'modules/nf-core/bcftools/isec/environment.yml' is unchanged +'modules/nf-core/bcftools/isec/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/isec/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/isec/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/bcftools/merge/bcftools-merge.diff b/modules/nf-core/bcftools/merge/bcftools-merge.diff new file mode 100644 index 00000000..6a8812db --- /dev/null +++ b/modules/nf-core/bcftools/merge/bcftools-merge.diff @@ -0,0 +1,21 @@ +Changes in component 'nf-core/bcftools/merge' +'modules/nf-core/bcftools/merge/meta.yml' is unchanged +Changes in 'bcftools/merge/main.nf': +--- modules/nf-core/bcftools/merge/main.nf ++++ modules/nf-core/bcftools/merge/main.nf +@@ -9,7 +9,8 @@ + + input: + tuple val(meta), path(vcfs), path(tbis), path(bed) +- tuple val(meta2), path(fasta), path(fai) ++ tuple val(meta2), path(fasta) ++ tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: vcf + +'modules/nf-core/bcftools/merge/environment.yml' is unchanged +'modules/nf-core/bcftools/merge/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/merge/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/merge/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/bcftools/query/bcftools-query.diff b/modules/nf-core/bcftools/query/bcftools-query.diff new file mode 100644 index 00000000..12ebaeaa --- /dev/null +++ b/modules/nf-core/bcftools/query/bcftools-query.diff @@ -0,0 +1,30 @@ +Changes in component 'nf-core/bcftools/query' +'modules/nf-core/bcftools/query/meta.yml' is unchanged +Changes in 'bcftools/query/main.nf': +--- modules/nf-core/bcftools/query/main.nf ++++ modules/nf-core/bcftools/query/main.nf +@@ -14,7 +14,8 @@ + path samples + + output: +- tuple val(meta), path("*.${suffix}"), emit: output ++ tuple val(meta), path("*.${suffix}.gz"), emit: output ++ tuple val(meta), path("*.${suffix}.gz.tbi"), emit: index + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: +@@ -35,6 +36,8 @@ + ${args} \\ + ${vcf} \\ + > ${prefix}.${suffix} ++ bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz ++ tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz + """ + + stub: + +'modules/nf-core/bcftools/query/environment.yml' is unchanged +'modules/nf-core/bcftools/query/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/query/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/query/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff b/modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff new file mode 100644 index 00000000..e4aa7b97 --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff @@ -0,0 +1,18 @@ +Changes in component 'nf-core/deepvariant/callvariants' +'modules/nf-core/deepvariant/callvariants/meta.yml' is unchanged +Changes in 'deepvariant/callvariants/main.nf': +--- modules/nf-core/deepvariant/callvariants/main.nf ++++ modules/nf-core/deepvariant/callvariants/main.nf +@@ -2,6 +2,7 @@ + process DEEPVARIANT_CALLVARIANTS { + tag "$meta.id" + label 'process_high' ++ label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" + +'modules/nf-core/deepvariant/callvariants/tests/main.nf.test' is unchanged +'modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap' is unchanged +'modules/nf-core/deepvariant/callvariants/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/deepvariant/callvariants/main.nf b/modules/nf-core/deepvariant/callvariants/main.nf index 251f4c73..d9218062 100644 --- a/modules/nf-core/deepvariant/callvariants/main.nf +++ b/modules/nf-core/deepvariant/callvariants/main.nf @@ -2,7 +2,7 @@ process DEEPVARIANT_CALLVARIANTS { tag "$meta.id" label 'process_high' - label 'process_gpu' + label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" //Conda is not supported at the moment container "docker.io/google/deepvariant:1.9.0" diff --git a/modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff b/modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff new file mode 100644 index 00000000..8e5312b2 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff @@ -0,0 +1,18 @@ +Changes in component 'nf-core/deepvariant/makeexamples' +'modules/nf-core/deepvariant/makeexamples/meta.yml' is unchanged +Changes in 'deepvariant/makeexamples/main.nf': +--- modules/nf-core/deepvariant/makeexamples/main.nf ++++ modules/nf-core/deepvariant/makeexamples/main.nf +@@ -36,6 +36,7 @@ + --mode calling \\ + --ref "${fasta}" \\ + --reads "${input}" \\ ++ --sample_name ${prefix} \\ + --examples "./${prefix}.examples.tfrecord@${task.cpus}.gz" \\ + --gvcf "./${prefix}.gvcf.tfrecord@${task.cpus}.gz" \\ + ${regions} \\ + +'modules/nf-core/deepvariant/makeexamples/tests/main.nf.test' is unchanged +'modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap' is unchanged +'modules/nf-core/deepvariant/makeexamples/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff b/modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff new file mode 100644 index 00000000..c7acc49a --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff @@ -0,0 +1,18 @@ +Changes in component 'nf-core/deepvariant/postprocessvariants' +'modules/nf-core/deepvariant/postprocessvariants/meta.yml' is unchanged +Changes in 'deepvariant/postprocessvariants/main.nf': +--- modules/nf-core/deepvariant/postprocessvariants/main.nf ++++ modules/nf-core/deepvariant/postprocessvariants/main.nf +@@ -64,6 +64,7 @@ + --outfile "${prefix}.vcf.gz" \\ + --nonvariant_site_tfrecord_path "${gvcf_tfrecords_logical_name}" \\ + --gvcf_outfile "${prefix}.g.vcf.gz" \\ ++ --sample_name ${prefix} \\ + ${regions} \\ + ${small_model_arg} \\ + --cpus $task.cpus + +'modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test' is unchanged +'modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test.snap' is unchanged +'modules/nf-core/deepvariant/postprocessvariants/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/severus/main.nf b/modules/nf-core/severus/main.nf index f191fd0b..95fe7912 100644 --- a/modules/nf-core/severus/main.nf +++ b/modules/nf-core/severus/main.nf @@ -20,12 +20,12 @@ process SEVERUS { tuple val(meta), path("${prefix}/severus_collaped_dup.bed") , emit: collapsed_dup , optional: true tuple val(meta), path("${prefix}/severus_LOH.bed") , emit: loh , optional: true tuple val(meta), path("${prefix}/all_SVs/severus_all.vcf.gz") , emit: all_vcf , optional: true - tuple val(meta), path("${prefix}/all_SVs/breakpoints_clusters_list.tsv") , emit: all_breakpoints_clusters_list , optional: true - tuple val(meta), path("${prefix}/all_SVs/breakpoints_clusters.tsv") , emit: all_breakpoints_clusters , optional: true + tuple val(meta), path("${prefix}/all_SVs/breakpoint_clusters_list.tsv") , emit: all_breakpoints_clusters_list , optional: true + tuple val(meta), path("${prefix}/all_SVs/breakpoint_clusters.tsv") , emit: all_breakpoints_clusters , optional: true tuple val(meta), path("${prefix}/all_SVs/plots/severus_*.html") , emit: all_plots , optional: true tuple val(meta), path("${prefix}/somatic_SVs/severus_somatic.vcf.gz") , emit: somatic_vcf , optional: true - tuple val(meta), path("${prefix}/somatic_SVs/breakpoints_clusters_list.tsv"), emit: somatic_breakpoints_clusters_list, optional: true - tuple val(meta), path("${prefix}/somatic_SVs/breakpoints_clusters.tsv") , emit: somatic_breakpoints_clusters , optional: true + tuple val(meta), path("${prefix}/somatic_SVs/breakpoint_clusters_list.tsv"), emit: somatic_breakpoints_clusters_list, optional: true + tuple val(meta), path("${prefix}/somatic_SVs/breakpoint_clusters.tsv") , emit: somatic_breakpoints_clusters , optional: true tuple val(meta), path("${prefix}/somatic_SVs/plots/severus_*.html") , emit: somatic_plots , optional: true path "versions.yml" , emit: versions diff --git a/nextflow.config b/nextflow.config index 1cef0a68..2aaf892d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -48,6 +48,7 @@ params { skip_m6a = false skip_vep = false skip_modcall = false + use_gpu = false // minimap2 options minimap2_ont_model = null diff --git a/nextflow_schema.json b/nextflow_schema.json index f22408bc..f6b15390 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -57,13 +57,13 @@ "type": "string", "description": "specifies which germline variants are used for phasing and annotation. Must be [consensus, all,deepvariant,clair]", "default": "consensus", - "enum": ["consensus", "all","deepvariant","clair"] + "enum": ["consensus", "all", "deepvariant", "clair"] }, "somatic_var_keep": { "type": "string", "description": "specifies which somatic variants are used for phasing and annotation. Must be [consensus, all,deepvariant,clair]", "default": "all", - "enum": ["consensus", "all", "deepvariant","clair"] + "enum": ["consensus", "all", "deepvariant", "clair"] } } }, @@ -410,6 +410,9 @@ { "$ref": "#/$defs/input_output_options" }, + { + "$ref": "#/$defs/small_variant_calling_options" + }, { "$ref": "#/$defs/reference_genome_options" }, diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 35fb56bf..09c9961f 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2025-12-23T12:58:53+00:00", - "description": "# IntGenomicsLab/lrsomatic\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/IntGenomicsLab/lrsomatic)\n[![GitHub Actions CI Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.17751829-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.17751829)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/IntGenomicsLab/lrsomatic)\n\n## Introduction\n\n**IntGenomicsLab/lrsomatic** is a robust bioinformatics pipeline designed for processing and analyzing **somatic DNA sequencing** data for long-read sequencing technologies from **Oxford Nanopore** and **PacBio**. It supports both canonical base DNA and modified base calling, including specialized applications such as **Fiber-seq**.\n\nThis **end-to-end pipeline** handles the entire workflow \u2014 **from raw read processing and alignment, to comprehensive somatic variant calling**, including single nucleotide variants, indels, structural variants, copy number alterations, and modified bases.\n\nIt can be run in both **matched tumour-normal** and **tumour-only mode**, offering flexibility depending on the users study design.\n\nDeveloped using **Nextflow DSL2**, it offers high portability and scalability across diverse computing environments. By leveraging Docker or Singularity containers, installation is streamlined and results are highly reproducible. Each process runs in an isolated container, simplifying dependency management and updates. Where applicable, pipeline components are sourced from **nf-core/modules**, promoting reuse, interoperability, and consistency within the broader Nextflow and nf-core ecosystems.\n\n## Pipeline summary\n\n![image](./assets/lrsomatic_1.0.png)\n\n**1) Pre-processing:**\n\na. Raw read QC ([`cramino`](https://github.com/wdecoster/cramino))\n\nb. Alignment to the reference genome ([`minimap2`](https://github.com/lh3/minimap2))\n\nc. Post alignment QC ([`cramino`](https://github.com/wdecoster/cramino), [`samtools idxstats`](https://github.com/samtools/samtools), [`samtools flagstats`](https://github.com/samtools/samtools), [`samtools stats`](https://github.com/samtools/samtools))\n\nd. Specific for calling modified base calling ([`Modkit`](https://github.com/nanoporetech/modkit), [`Fibertools`](https://github.com/fiberseq/fibertools-rs))\n\n**2i) Matched mode: small variant calling:**\n\na. Calling Germline SNPs ([`Clair3`](https://github.com/HKU-BAL/Clair3))\n\nb. Phasing and Haplotagging the SNPs in the normal and tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\nc. Calling somatic SNVs ([`ClairS`](https://github.com/HKU-BAL/ClairS))\n\n**2ii) Tumour only mode: small variant calling:**\n\na. Calling Germline SNPs and somatic SNVs ([`ClairS-TO`](https://github.com/HKU-BAL/ClairS-TO))\n\nb. Phasing and Haplotagging germline SNPs in tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\n**3) Large variant calling:**\n\na. Somatic structural variant calling ([`Severus`](https://github.com/KolmogorovLab/Severus))\n\nb. Copy number alterion calling; long read version of ([`ASCAT`](https://github.com/VanLoo-lab/ascat))\n\n**4) Annotation:**\n\na. Small variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\nb. Structural variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst prepare a samplesheet with your input data that looks as follows:\n\n```csv\nsample,bam_tumor,bam_normal,platform,sex,fiber\nsample1,tumour.bam,normal.bam,ont,female,n\nsample2,tumour.bam,,ont,female,y\nsample3,tumour.bam,,pb,male,n\nsample4,tumour.bam,normal.bam,pb,male,y\n```\n\nEach row represents a sample. The bam files should always be unaligned bam files. All fields except for `bam_normal` are required. If `bam_normal` is empty, the pipeline will run in tumour only mode. `platform` should be either `ont` or `pb` for Oxford Nanopore Sequencing or PacBio sequencing, respectively. `sex` refers to the biological sex of the sample and should be either `female` or `male`. Finally, `fiber` specifies whether your sample is Fiber-seq data or not and should have either `y` for Yes or `n` for No.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run IntGenomicsLab/lrsomatic \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\nMore detail is given in our [usage documentation](/docs/usage.md)\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\n## Credits\n\nIntGenomicsLab/lr_somatic was originally written by Luuk Harbers, Robert Forsyth, Alexandra Pan\u010d\u00edkov\u00e1, Marios Eftychiou, Ruben Cools, Laurens Lambrechts, and Jonas Demeulemeester.\n\n## Pipeline output\n\nThis pipeline produces a series of different output files. The main output is an aligned and phased tumour bam file. This bam file can be used by any typical downstream tool that uses bam files as input. Furthermore, we have sample-specific QC outputs from `cramino` (fastq), `cramino` (bam), `mosdepth`, `samtools` (stats/flagstat/idxstats), and optionally `fibertools`. Finally, we have a `multiqc` report from that combines the output from `mosdepth` and `samtools` into one html report.\n\nBesides QC and the aligned and phased bam file, we have output from (structural) variant and copy number callers, of which some are optional. The output from these variant callers can be found in their respective folders. For small and structural variant callers (`clairS`, `clairS-TO`, and `severus`) these will contain, among others, `vcf` files with called variants. For `ascat` these contain files with final copy number information and plots of the copy number profiles.\n\nExample output directory structure:\n\n```\n\u251c\u2500\u2500 Sample 1\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500clairS-TO\n\u2502 \u2502 \u251c\u2500\u2500severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u2502\n\u251c\u2500\u2500 Sample 2\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u2502 \u251c\u2500\u2500 normal\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500 clair3\n\u2502 \u2502 \u251c\u2500\u2500 clairS\n\u2502 \u2502 \u251c\u2500\u2500 severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u251c\u2500\u2500 pipeline_info\n```\n\nmore detail is given in our [output documentation](/docs/output.md)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use IntGenomicsLab/lrsomatic for your analysis, please cite it using the following doi: [10.5281/zenodo.17751829](https://doi.org/10.5281/zenodo.17751829)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "# IntGenomicsLab/lrsomatic\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/IntGenomicsLab/lrsomatic)\n[![GitHub Actions CI Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.17751829-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.17751829)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/IntGenomicsLab/lrsomatic)\n\n## Introduction\n\n**IntGenomicsLab/lrsomatic** is a robust bioinformatics pipeline designed for processing and analyzing **somatic DNA sequencing** data for long-read sequencing technologies from **Oxford Nanopore** and **PacBio**. It supports both canonical base DNA and modified base calling, including specialized applications such as **Fiber-seq**.\n\nThis **end-to-end pipeline** handles the entire workflow \u2014 **from raw read processing and alignment, to comprehensive somatic variant calling**, including single nucleotide variants, indels, structural variants, copy number alterations, and modified bases.\n\nIt can be run in both **matched tumour-normal** and **tumour-only mode**, offering flexibility depending on the users study design.\n\nDeveloped using **Nextflow DSL2**, it offers high portability and scalability across diverse computing environments. By leveraging Docker or Singularity containers, installation is streamlined and results are highly reproducible. Each process runs in an isolated container, simplifying dependency management and updates. Where applicable, pipeline components are sourced from **nf-core/modules**, promoting reuse, interoperability, and consistency within the broader Nextflow and nf-core ecosystems.\n\n## Pipeline summary\n\n![image](./assets/lrsomatic_1.0.png)\n\n**1) Pre-processing:**\n\na. Raw read QC ([`cramino`](https://github.com/wdecoster/cramino))\n\nb. Alignment to the reference genome ([`minimap2`](https://github.com/lh3/minimap2))\n\nc. Post alignment QC ([`cramino`](https://github.com/wdecoster/cramino), [`samtools idxstats`](https://github.com/samtools/samtools), [`samtools flagstats`](https://github.com/samtools/samtools), [`samtools stats`](https://github.com/samtools/samtools))\n\nd. Specific for calling modified base calling ([`Modkit`](https://github.com/nanoporetech/modkit), [`Fibertools`](https://github.com/fiberseq/fibertools-rs))\n\n**2i) Matched mode: small variant calling:**\n\na. Calling Germline SNPs ([`Clair3`](https://github.com/HKU-BAL/Clair3))\n\nb. Phasing and Haplotagging the SNPs in the normal and tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\nc. Calling somatic SNVs ([`ClairS`](https://github.com/HKU-BAL/ClairS))\n\n**2ii) Tumour only mode: small variant calling:**\n\na. Calling Germline SNPs and somatic SNVs ([`ClairS-TO`](https://github.com/HKU-BAL/ClairS-TO))\n\nb. Phasing and Haplotagging germline SNPs in tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\n**3) Large variant calling:**\n\na. Somatic structural variant calling ([`Severus`](https://github.com/KolmogorovLab/Severus))\n\nb. Copy number alterion calling; long read version of ([`ASCAT`](https://github.com/VanLoo-lab/ascat))\n\n**4) Annotation:**\n\na. Small variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\nb. Structural variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst prepare a samplesheet with your input data that looks as follows:\n\n```csv\nsample,bam_tumor,bam_normal,platform,sex,fiber\nsample1,tumour.bam,normal.bam,ont,female,n\nsample2,tumour.bam,,ont,female,y\nsample3,tumour.bam,,pb,male,n\nsample4,tumour.bam,normal.bam,pb,male,y\n```\n\nEach row represents a sample. The bam files should always be unaligned bam files. All fields except for `bam_normal` are required. If `bam_normal` is empty, the pipeline will run in tumour only mode. `platform` should be either `ont` or `pb` for Oxford Nanopore Sequencing or PacBio sequencing, respectively. `sex` refers to the biological sex of the sample and should be either `female` or `male`. Finally, `fiber` specifies whether your sample is Fiber-seq data or not and should have either `y` for Yes or `n` for No.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run IntGenomicsLab/lrsomatic \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\nMore detail is given in our [usage documentation](/docs/usage.md)\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\n## Credits\n\nIntGenomicsLab/lr_somatic was originally written by Luuk Harbers, Robert Forsyth, Alexandra Pan\u010d\u00edkov\u00e1, Marios Eftychiou, Ruben Cools, Laurens Lambrechts, and Jonas Demeulemeester.\n\n## Pipeline output\n\nThis pipeline produces a series of different output files. The main output is an aligned and phased tumour bam file. This bam file can be used by any typical downstream tool that uses bam files as input. Furthermore, we have sample-specific QC outputs from `cramino` (fastq), `cramino` (bam), `mosdepth`, `samtools` (stats/flagstat/idxstats), and optionally `fibertools`. Finally, we have a `multiqc` report from that combines the output from `mosdepth` and `samtools` into one html report.\n\nBesides QC and the aligned and phased bam file, we have output from (structural) variant and copy number callers, of which some are optional. The output from these variant callers can be found in their respective folders. For small and structural variant callers (`clairS`, `clairS-TO`, and `severus`) these will contain, among others, `vcf` files with called variants. For `ascat` these contain files with final copy number information and plots of the copy number profiles.\n\nExample output directory structure:\n\n```\n\u251c\u2500\u2500 Sample 1\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500clairS-TO\n\u2502 \u2502 \u251c\u2500\u2500severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u2502\n\u251c\u2500\u2500 Sample 2\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u2502 \u251c\u2500\u2500 normal\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500 clair3\n\u2502 \u2502 \u251c\u2500\u2500 clairS\n\u2502 \u2502 \u251c\u2500\u2500 severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u251c\u2500\u2500 pipeline_info\n```\n\nmore detail is given in our [output documentation](/docs/output.md)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use `IntGenomicsLab/lrsomatic` for your analysis, please cite it using the following:\n\n> LRSomatic: a highly scalable and robust pipeline for somatic variant calling in long-read sequencing data\n>\n> Robert A. Forsyth*, Luuk Harbers*, Amber Verhasselt, Ana-Luc\u00eda Rocha Iraiz\u00f3s, Sidi Yang, Joris Vande Velde, Christopher Davies, Nischalan Pillay, Laurens Lambrechts, Jonas Demeulemeester\n>\n> bioRxiv 2026.02.26.707772; doi: https://doi.org/10.64898/2026.02.26.707772\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/subworkflows/local/deepsomatic.nf b/subworkflows/local/deepsomatic.nf index 920795ad..c91ca6af 100644 --- a/subworkflows/local/deepsomatic.nf +++ b/subworkflows/local/deepsomatic.nf @@ -18,7 +18,7 @@ workflow DEEPSOMATIC { // Input to postprocessing step needs both the gvcfs from MAKEEXAMPLES and the variant // calls from CALLVARIANTS. Joining on meta, which is assumed to be unique. - + ch_postproc_input = DEEPSOMATIC_CALLVARIANTS.out.call_variants_tfrecords.join( DEEPSOMATIC_MAKEEXAMPLES.out.gvcf, diff --git a/subworkflows/local/paired/paired_smallvar_germline.nf b/subworkflows/local/paired/paired_smallvar_germline.nf index 4ad2d727..2e9f286c 100644 --- a/subworkflows/local/paired/paired_smallvar_germline.nf +++ b/subworkflows/local/paired/paired_smallvar_germline.nf @@ -104,7 +104,7 @@ workflow PAIRED_SMALLVAR_GERMLINE { clair3_ch .mix(deepvariant_ch) .set{combined_germline_ch} - + GERMLINE_CONSENSUS( combined_germline_ch, fasta, diff --git a/subworkflows/local/paired/paired_smallvar_somatic.nf b/subworkflows/local/paired/paired_smallvar_somatic.nf index e7cad038..421864bd 100644 --- a/subworkflows/local/paired/paired_smallvar_somatic.nf +++ b/subworkflows/local/paired/paired_smallvar_somatic.nf @@ -18,7 +18,7 @@ workflow PAIRED_SMALLVAR_SOMATIC { ch_versions = channel.empty() somatic_vcf = channel.empty() somatic_tbi = channel.empty() - + // CLAIRS if(params.somatic_var_keep != 'deepvariant') { tumor_normal_bams @@ -32,13 +32,13 @@ workflow PAIRED_SMALLVAR_SOMATIC { fasta, fai ) - + // CONCAT CLAIRS INDEL AND SNV OUTPUT CLAIRS.out.vcfs .join(CLAIRS.out.tbi) .set{clairs_out} - + BCFTOOLS_CONCAT ( clairs_out ) diff --git a/subworkflows/local/phasing_haplotyping.nf b/subworkflows/local/phasing_haplotyping.nf index b543e3cc..d7987b29 100644 --- a/subworkflows/local/phasing_haplotyping.nf +++ b/subworkflows/local/phasing_haplotyping.nf @@ -1,12 +1,12 @@ // Import modules -include { LONGPHASE_PHASE as LONGPHASE_PHASE_GERMLINE } from '../../modules/nf-core/longphase/phase/main.nf' -include { LONGPHASE_PHASE as LONGPHASE_PHASE_SOMATIC } from '../../modules/nf-core/longphase/phase/main.nf' -include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' -include { LONGPHASE_MODCALL as LONGPHASE_MODCALL_GERMLINE } from '../../modules/local/longphase/modcall/main.nf' -include { LONGPHASE_MODCALL as LONGPHASE_MODCALL_SOMATIC } from '../../modules/local/longphase/modcall/main.nf' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' -include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat/main' -include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main' +include { LONGPHASE_PHASE as LONGPHASE_PHASE_GERMLINE } from '../../modules/nf-core/longphase/phase/main.nf' +include { LONGPHASE_PHASE as LONGPHASE_PHASE_SOMATIC } from '../../modules/nf-core/longphase/phase/main.nf' +include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' +include { LONGPHASE_MODCALL as LONGPHASE_MODCALL_GERMLINE } from '../../modules/local/longphase/modcall/main.nf' +include { LONGPHASE_MODCALL as LONGPHASE_MODCALL_SOMATIC } from '../../modules/local/longphase/modcall/main.nf' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' +include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main' workflow PHASING_HAPLOTYPING { @@ -18,15 +18,15 @@ workflow PHASING_HAPLOTYPING { fai main: - + // SPLIT INTO PAIRED AND TUMOR ONLY - tumor_normal_bams + tumor_normal_bams .branch { meta, _bams, _bai -> paired: meta.paired_data tumor_only: !meta.paired_data } .set { branched_bams } - + branched_bams.paired .set{ paired_ch } @@ -51,7 +51,7 @@ workflow PHASING_HAPLOTYPING { tumor: meta.type == "tumor" } .set {paired_ch_branched} - + paired_ch_branched.normal .map { meta, bam, bai -> def new_meta = meta.subMap('id', @@ -140,7 +140,7 @@ workflow PHASING_HAPLOTYPING { return [ meta, bam, bai, vcf, svs, mods ] } .set{ longphase_phase_somatic_input_ch } - } + } else { normal_bams_w_tumoronly_ch .join(germline_vcf) @@ -150,7 +150,7 @@ workflow PHASING_HAPLOTYPING { return [ meta, bam, bai, vcf, svs, mods ] } .set{ longphase_phase_germline_input_ch } - + tumor_bams_ch .join(germline_somatic_vcfs) .join(LONGPHASE_MODCALL_SOMATIC.out.mod_vcf) @@ -171,7 +171,7 @@ workflow PHASING_HAPLOTYPING { LONGPHASE_PHASE_GERMLINE.out.snv_vcf .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf_index) .set{ phased_germline_vcf } - + LONGPHASE_PHASE_SOMATIC ( longphase_phase_somatic_input_ch, fasta, @@ -185,7 +185,7 @@ workflow PHASING_HAPLOTYPING { // HAPLOTAGING // remove type for merging - + if(!params.skip_modcall) { LONGPHASE_MODCALL_GERMLINE.out.mod_vcf @@ -209,30 +209,30 @@ workflow PHASING_HAPLOTYPING { .map { meta, bam, bai, vcf, mods -> def new_meta = meta + [type : "tumor"] def svs = [] - return [new_meta, bam, bai, vcf, svs, mods] + return [new_meta, bam, bai, vcf, svs, mods] } .set{ tumor_only_ch } - + paired_tumor_ch .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) .join(modcall_vcf_ch) .map { meta, bam, bai, vcf, mods -> def new_meta = meta + [type : "tumor"] def svs = [] - return [new_meta, bam, bai, vcf, svs, mods] + return [new_meta, bam, bai, vcf, svs, mods] } .set{ paired_tumor_ch } - + paired_normal_ch .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) .join(modcall_vcf_ch) .map { meta, bam, bai, vcf, mods -> def new_meta = meta + [type : "normal"] def svs = [] - return [new_meta, bam, bai, vcf, svs, mods] + return [new_meta, bam, bai, vcf, svs, mods] } .set{ paired_normal_ch } - + } else { @@ -242,27 +242,27 @@ workflow PHASING_HAPLOTYPING { def new_meta = meta + [type : "tumor"] def svs = [] def mods = [] - return [new_meta, bam, bai, vcf, svs, mods] + return [new_meta, bam, bai, vcf, svs, mods] } .set{ tumor_only_ch } - + paired_tumor_ch .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) .map { meta, bam, bai, vcf -> def new_meta = meta + [type : "tumor"] def svs = [] def mods = [] - return [new_meta, bam, bai, vcf, svs, mods] + return [new_meta, bam, bai, vcf, svs, mods] } .set{ paired_tumor_ch } - + paired_normal_ch .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) .map { meta, bam, bai, vcf -> def new_meta = meta + [type : "normal"] def svs = [] def mods = [] - return [new_meta, bam, bai, vcf, svs, mods] + return [new_meta, bam, bai, vcf, svs, mods] } .set{ paired_normal_ch } @@ -293,4 +293,4 @@ workflow PHASING_HAPLOTYPING { emit: tumor_normal_hapbams_ch phased_germline_vcf -} \ No newline at end of file +} diff --git a/subworkflows/local/small_variant_consensus.nf b/subworkflows/local/small_variant_consensus.nf index 823c38c9..4665d4bb 100644 --- a/subworkflows/local/small_variant_consensus.nf +++ b/subworkflows/local/small_variant_consensus.nf @@ -19,7 +19,7 @@ workflow SMALL_VARIANT_CONSENSUS { //normalize VCFs BCFTOOLS_NORM(mixed_vcfs, fasta) - BCFTOOLS_NORM.out.vcf + BCFTOOLS_NORM.out.vcf .join(BCFTOOLS_NORM.out.tbi) .set {normalized_vcfs} @@ -52,7 +52,7 @@ workflow SMALL_VARIANT_CONSENSUS { clair_ch = annotated_vcfs_branched.clair deepvariant_ch = annotated_vcfs_branched.deepvariant - + clair_ch. map {meta, vcfs, tbi -> def new_meta = meta.subMap('id', @@ -118,11 +118,11 @@ workflow SMALL_VARIANT_CONSENSUS { BCFTOOLS_ISEC.out.clair_consensus_tbi .set{tbi} } - + } else if (var_keep_method == 'all'){ - + mixed_vcfs .map{ meta, vcfs, tbis -> def file = [] @@ -131,7 +131,7 @@ workflow SMALL_VARIANT_CONSENSUS { return [meta, vcfs, tbis, file, target, regions] } .set{isec_input} - + BCFTOOLS_ISEC(isec_input) if (params.trust_caller == 'deepvariant') { @@ -172,4 +172,4 @@ workflow SMALL_VARIANT_CONSENSUS { vcf tbi -} \ No newline at end of file +} diff --git a/subworkflows/local/tumor_only/tumoronly_smallvar.nf b/subworkflows/local/tumor_only/tumoronly_smallvar.nf index 7db49724..6fe9de8a 100644 --- a/subworkflows/local/tumor_only/tumoronly_smallvar.nf +++ b/subworkflows/local/tumor_only/tumoronly_smallvar.nf @@ -60,7 +60,7 @@ workflow TUMORONLY_SMALLVAR { return [ new_meta, vcf, tbi] } .set{clairsto_germline_ch} - + VCFSPLIT.out.somatic_vcf .join(VCFSPLIT.out.somatic_tbi) .map { meta, vcf, tbi -> @@ -85,7 +85,7 @@ workflow TUMORONLY_SMALLVAR { [[:],[]], [[:],[]] ) - + DEEPVARIANT.out.vcf .join(DEEPVARIANT.out.vcf_index) @@ -202,6 +202,6 @@ workflow TUMORONLY_SMALLVAR { emit: somatic_vcf germline_vcf - - -} \ No newline at end of file + + +} diff --git a/subworkflows/nf-core/deepvariant/deepvariant.diff b/subworkflows/nf-core/deepvariant/deepvariant.diff new file mode 100644 index 00000000..691bc284 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/deepvariant.diff @@ -0,0 +1,23 @@ +Changes in component 'nf-core/deepvariant' +'subworkflows/nf-core/deepvariant/README.md' is unchanged +'subworkflows/nf-core/deepvariant/meta.yml' is unchanged +Changes in 'deepvariant/main.nf': +--- subworkflows/nf-core/deepvariant/main.nf ++++ subworkflows/nf-core/deepvariant/main.nf +@@ -6,7 +6,7 @@ + take: + ch_input // channel: [ val(meta), path(input), path(index), path(intervals)] + ch_fasta // channel: [ val(meta2), path(fasta) ] +- ch_fai // channel: [ val(meta3), path(fail) ] ++ ch_fai // channel: [ val(meta3), path(fai) ] + ch_gzi // channel: [ val(meta4), path(gzi) ] + ch_par_bed // channel: [ val(meta5), path(par_bed) ] + + +'subworkflows/nf-core/deepvariant/tests/main.nf.test' is unchanged +'subworkflows/nf-core/deepvariant/tests/equality.nf.test' is unchanged +'subworkflows/nf-core/deepvariant/tests/disable-small-model.conf' is unchanged +'subworkflows/nf-core/deepvariant/tests/deepvariant-workflow-and-process-equality-tester.nf' is unchanged +'subworkflows/nf-core/deepvariant/tests/main.nf.test.snap' is unchanged +'subworkflows/nf-core/deepvariant/tests/nextflow.config' is unchanged +************************************************************ diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 778a5df1..375af10f 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -76,6 +76,9 @@ "SAMTOOLS_IDXSTATS": { "samtools": "1.22.1" }, + "SAMTOOLS_INDEX": { + "samtools": "1.22.1" + }, "SAMTOOLS_STATS": { "samtools": "1.22.1" }, @@ -109,10 +112,6 @@ } }, [ - "longphase", - "longphase/sample1.vcf", - "longphase/sample2.vcf", - "longphase/sample3.vcf", "multiqc", "multiqc/multiqc_data", "multiqc/multiqc_data/llms-full.txt", @@ -188,6 +187,11 @@ "pipeline_info", "pipeline_info/lrsomatic_software_mqc_versions.yml", "sample1", + "sample1/bamfiles", + "sample1/bamfiles/sample1_normal.bam", + "sample1/bamfiles/sample1_normal.bam.bai", + "sample1/bamfiles/sample1_tumor.bam", + "sample1/bamfiles/sample1_tumor.bam.bai", "sample1/qc", "sample1/qc/normal", "sample1/qc/normal/cramino_aln", @@ -261,8 +265,6 @@ "sample1/variants/clairs/snvs.vcf.gz", "sample1/variants/clairs/snvs.vcf.gz.tbi", "sample1/variants/deepsomatic", - "sample1/variants/deepsomatic/sample1.g.vcf.gz", - "sample1/variants/deepsomatic/sample1.g.vcf.gz.tbi", "sample1/variants/deepsomatic/sample1.vcf.gz", "sample1/variants/deepsomatic/sample1.vcf.gz.tbi", "sample1/variants/deepvariant", @@ -271,18 +273,26 @@ "sample1/variants/deepvariant/sample1.vcf.gz", "sample1/variants/deepvariant/sample1.vcf.gz.tbi", "sample1/variants/phased", + "sample1/variants/phased/germline_smallvariants.vcf.gz", + "sample1/variants/phased/germline_smallvariants.vcf.gz.tbi", + "sample1/variants/phased/germline_smallvariants_mod.vcf.gz", + "sample1/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", "sample1/variants/phased/somatic_smallvariants.vcf.gz", "sample1/variants/phased/somatic_smallvariants.vcf.gz.tbi", "sample1/variants/phased/somatic_smallvariants_mod.vcf.gz", "sample1/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", "sample1/variants/severus", "sample1/variants/severus/all_SVs", + "sample1/variants/severus/all_SVs/breakpoint_clusters.tsv", + "sample1/variants/severus/all_SVs/breakpoint_clusters_list.tsv", "sample1/variants/severus/all_SVs/severus_all.vcf.gz", "sample1/variants/severus/breakpoints_double.csv", "sample1/variants/severus/read_ids.csv", "sample1/variants/severus/read_qual.txt", "sample1/variants/severus/severus.log", "sample1/variants/severus/somatic_SVs", + "sample1/variants/severus/somatic_SVs/breakpoint_clusters.tsv", + "sample1/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", "sample1/variants/severus/somatic_SVs/severus_somatic.vcf.gz", "sample1/vep", "sample1/vep/SVs", @@ -298,6 +308,11 @@ "sample1/vep/somatic/sample1_SOMATIC_VEP.vcf.gz.tbi", "sample1/vep/somatic/sample1_SOMATIC_VEP.vcf.gz_summary.html", "sample2", + "sample2/bamfiles", + "sample2/bamfiles/sample2_normal.bam", + "sample2/bamfiles/sample2_normal.bam.bai", + "sample2/bamfiles/sample2_tumor.bam", + "sample2/bamfiles/sample2_tumor.bam.bai", "sample2/qc", "sample2/qc/normal", "sample2/qc/normal/cramino_aln", @@ -371,8 +386,6 @@ "sample2/variants/clairs/snvs.vcf.gz", "sample2/variants/clairs/snvs.vcf.gz.tbi", "sample2/variants/deepsomatic", - "sample2/variants/deepsomatic/sample2.g.vcf.gz", - "sample2/variants/deepsomatic/sample2.g.vcf.gz.tbi", "sample2/variants/deepsomatic/sample2.vcf.gz", "sample2/variants/deepsomatic/sample2.vcf.gz.tbi", "sample2/variants/deepvariant", @@ -381,18 +394,26 @@ "sample2/variants/deepvariant/sample2.vcf.gz", "sample2/variants/deepvariant/sample2.vcf.gz.tbi", "sample2/variants/phased", + "sample2/variants/phased/germline_smallvariants.vcf.gz", + "sample2/variants/phased/germline_smallvariants.vcf.gz.tbi", + "sample2/variants/phased/germline_smallvariants_mod.vcf.gz", + "sample2/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", "sample2/variants/phased/somatic_smallvariants.vcf.gz", "sample2/variants/phased/somatic_smallvariants.vcf.gz.tbi", "sample2/variants/phased/somatic_smallvariants_mod.vcf.gz", "sample2/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", "sample2/variants/severus", "sample2/variants/severus/all_SVs", + "sample2/variants/severus/all_SVs/breakpoint_clusters.tsv", + "sample2/variants/severus/all_SVs/breakpoint_clusters_list.tsv", "sample2/variants/severus/all_SVs/severus_all.vcf.gz", "sample2/variants/severus/breakpoints_double.csv", "sample2/variants/severus/read_ids.csv", "sample2/variants/severus/read_qual.txt", "sample2/variants/severus/severus.log", "sample2/variants/severus/somatic_SVs", + "sample2/variants/severus/somatic_SVs/breakpoint_clusters.tsv", + "sample2/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", "sample2/variants/severus/somatic_SVs/severus_somatic.vcf.gz", "sample2/vep", "sample2/vep/SVs", @@ -408,6 +429,9 @@ "sample2/vep/somatic/sample2_SOMATIC_VEP.vcf.gz.tbi", "sample2/vep/somatic/sample2_SOMATIC_VEP.vcf.gz_summary.html", "sample3", + "sample3/bamfiles", + "sample3/bamfiles/sample3_tumor.bam", + "sample3/bamfiles/sample3_tumor.bam.bai", "sample3/qc", "sample3/qc/tumor", "sample3/qc/tumor/cramino_aln", @@ -451,8 +475,6 @@ "sample3/variants/clairsto/somatic.vcf.gz", "sample3/variants/clairsto/somatic.vcf.gz.tbi", "sample3/variants/deepsomatic", - "sample3/variants/deepsomatic/sample3.g.vcf.gz", - "sample3/variants/deepsomatic/sample3.g.vcf.gz.tbi", "sample3/variants/deepsomatic/sample3.vcf.gz", "sample3/variants/deepsomatic/sample3.vcf.gz.tbi", "sample3/variants/deepvariant", @@ -461,18 +483,26 @@ "sample3/variants/deepvariant/sample3.vcf.gz", "sample3/variants/deepvariant/sample3.vcf.gz.tbi", "sample3/variants/phased", + "sample3/variants/phased/germline_smallvariants.vcf.gz", + "sample3/variants/phased/germline_smallvariants.vcf.gz.tbi", + "sample3/variants/phased/germline_smallvariants_mod.vcf.gz", + "sample3/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", "sample3/variants/phased/somatic_smallvariants.vcf.gz", "sample3/variants/phased/somatic_smallvariants.vcf.gz.tbi", "sample3/variants/phased/somatic_smallvariants_mod.vcf.gz", "sample3/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", "sample3/variants/severus", "sample3/variants/severus/all_SVs", + "sample3/variants/severus/all_SVs/breakpoint_clusters.tsv", + "sample3/variants/severus/all_SVs/breakpoint_clusters_list.tsv", "sample3/variants/severus/all_SVs/severus_all.vcf.gz", "sample3/variants/severus/breakpoints_double.csv", "sample3/variants/severus/read_ids.csv", "sample3/variants/severus/read_qual.txt", "sample3/variants/severus/severus.log", "sample3/variants/severus/somatic_SVs", + "sample3/variants/severus/somatic_SVs/breakpoint_clusters.tsv", + "sample3/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", "sample3/variants/severus/somatic_SVs/severus_somatic.vcf.gz", "sample3/vep", "sample3/vep/SVs", @@ -489,60 +519,73 @@ "sample3/vep/somatic/sample3_SOMATIC_VEP.vcf.gz_summary.html" ], [ - "sample1.vcf:md5,26cef7c7c05a6d2e076aeb0e0bef7fe2", - "sample2.vcf:md5,e50e314896682852a973f1f9236f908c", - "sample3.vcf:md5,009e8d6ef6736c8f7760d3cbbbf91f73", + "sample1_normal.bam:md5,92a00e311e085a34d443cb64694ce839", + "sample1_normal.bam.bai:md5,c7dff8adc4c8d33a81fb8ea7dff4a98e", + "sample1_tumor.bam:md5,c6c79808f928393b23ae53976a7304c3", + "sample1_tumor.bam.bai:md5,317d711c0c26d2cfb933ad53b69da1d7", "sample1.flagstat:md5,1c41ea9923945501eb7e41f83a90502d", "sample1.idxstats:md5,902e503387799123ea59255e3fca172c", "sample1.stats:md5,70fabbdc07dec0479b3fc7dcec344054", "sample1.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", "sample1.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", "sample1.stats:md5,5012c82d3d3ca60ffdd2fb970f772566", - "sample1.g.vcf.gz:md5,e9c0e1c7f90d334faccab6b854611643", - "sample1.g.vcf.gz.tbi:md5,6110e6f6ab72dbc0bee604afe690893f", "sample1.vcf.gz:md5,92c0fa9016c3d8b192eb382fd6a81199", "sample1.vcf.gz.tbi:md5,04e82577dc57f80b9db25897389364b2", "sample1.g.vcf.gz:md5,5ed06f35ccecf7aadbec54873dc07e64", "sample1.g.vcf.gz.tbi:md5,ca628ef368d34a7a6c77098a4c4bdf36", "sample1.vcf.gz:md5,ad971a535d2b8014fabdabe72995a5db", "sample1.vcf.gz.tbi:md5,65de5caace8d4312afa707c3bfd6fa45", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", "breakpoints_double.csv:md5,d63f058075bfe791248954ca2ee6c4fb", "read_qual.txt:md5,78247dfa2ea336eac0e128eba5e9eef4", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", + "sample2_normal.bam:md5,ed6aab4d2ef70537390a0ce85276ad2c", + "sample2_normal.bam.bai:md5,fb92c167c63e7f62d0c4c3fea8bd1b60", + "sample2_tumor.bam:md5,10c29ced5ed253731ca50097d9c848e3", + "sample2_tumor.bam.bai:md5,e0aa6d8d594070e753145503520fffab", "sample2.flagstat:md5,714d0cc0c213e2640e54a16f3d0e6e7e", "sample2.idxstats:md5,72eb83bb11748dc863fef1a0a5497e4b", "sample2.stats:md5,87cb6e9adf8a133244e8b331be43bb14", "sample2.flagstat:md5,4344a8745efef9cc2a017024218d61c6", "sample2.idxstats:md5,69467fc02c83a30084736aeea8b785fb", "sample2.stats:md5,1e044857eeefb284fda88ee58ff7a04a", - "sample2.g.vcf.gz:md5,1cb2d7bf929e4e87a8591c11bf5034c7", - "sample2.g.vcf.gz.tbi:md5,67674d00b976135fe29f318067a01f7a", "sample2.vcf.gz:md5,f9d6266ee49c2544dc28eef0cdd9dcae", "sample2.vcf.gz.tbi:md5,433ecd219947787d00dd0fada6307661", "sample2.g.vcf.gz:md5,7998d15ce582ec94e86568e3fae654f9", "sample2.g.vcf.gz.tbi:md5,9498e3266900eee2448ff05b0bce87c0", "sample2.vcf.gz:md5,33330d8444a1774864883b33e1e2235c", "sample2.vcf.gz.tbi:md5,384b8b5ea7c1559b464031f8e0863532", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", "breakpoints_double.csv:md5,a68d7fc9c7a7cb2f31e73189c5412f7b", "read_qual.txt:md5,8b92ff7dc4536188be159b95525511cd", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", + "sample3_tumor.bam:md5,5020a416186da412d9e89f7efac64178", + "sample3_tumor.bam.bai:md5,fc26fd7d9b388e6551898aacf5ed0c7b", "sample3.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", "sample3.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", "sample3.stats:md5,d7a8552a8a41a217954a0c825d468a60", - "sample3.g.vcf.gz:md5,2392430290a5d6a9a43b359faebc025b", - "sample3.g.vcf.gz.tbi:md5,4349b51070a4274707b94ef9b4d8c0bd", "sample3.vcf.gz:md5,ee4f7a5bbd471c1370accf888b8262c0", "sample3.vcf.gz.tbi:md5,944349ec46ecc95bab7db2f6848d0c36", "sample3.g.vcf.gz:md5,64adc407ef2434254e76763b72c67b08", "sample3.g.vcf.gz.tbi:md5,ac0397f7a840cf618d56f097defba878", "sample3.vcf.gz:md5,9c52a803d796157a08b565c653d4685c", "sample3.vcf.gz.tbi:md5,f0b3599e4281a6f3bbb8bc408740e58e", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", "breakpoints_double.csv:md5,46e03bf2d67aa736b599f00fe7f01e06", - "read_qual.txt:md5,b918430d35354dad1d7f02f21e4cd4ed" + "read_qual.txt:md5,b918430d35354dad1d7f02f21e4cd4ed", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50" ] ], "meta": { "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-23T16:14:08.943804849" + "timestamp": "2026-03-24T11:55:01.760175088" } } \ No newline at end of file diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index add2d641..8f377a91 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -93,7 +93,7 @@ workflow LRSOMATIC { params.bed_file = getGenomeAttribute('bed_file') params.vep_genome = getGenomeAttribute('vep_genome') params.vep_species = getGenomeAttribute('vep_species') - + if (params.pons_vcfs != null) { pon_files = params.pon_vcfs.collect { file(it) } pon_flags = params.pon_flags @@ -397,7 +397,7 @@ workflow LRSOMATIC { .join(MINIMAP2_ALIGN.out.index) .set {ch_index_minimap} - ch_index_minimap + ch_index_minimap .branch { meta, _bams, _bais -> paired: meta.paired_data tumor_only: !meta.paired_data @@ -466,7 +466,7 @@ workflow LRSOMATIC { ch_fai, downloaded_clair3_models ) - + PAIRED_SMALLVAR_GERMLINE.out.germline_vcf .mix(TUMORONLY_SMALLVAR.out.germline_vcf) .set{ch_germline_vcf} @@ -484,23 +484,23 @@ workflow LRSOMATIC { ch_fai ) - + ch_somatic_vcf .map { meta, vcf, _tbi -> def extra = [] return [meta, vcf, extra] } .set { somatic_vep } - + ch_germline_vcf .map { meta, vcf, _tbi -> def extra = [] return [meta, vcf, extra] } .set { germline_vep } - + /// figure out severus channel structure then test - + // [meta, vcf, []] -- somatic variants merged from T/N and tumor-only paths if (!params.skip_vep) { @@ -567,7 +567,7 @@ workflow LRSOMATIC { .mix(somatic_smallvar_input) .join(PHASING_HAPLOTYPING.out.phased_germline_vcf) .set{severus_input} - + // // MODULE: SEVERUS // From e098f3a93a6a4d5b6716688e60163368cc50c608 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 24 Mar 2026 13:24:18 +0100 Subject: [PATCH 18/36] Update modules/local/vcfsplit/main.nf Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- modules/local/vcfsplit/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/vcfsplit/main.nf b/modules/local/vcfsplit/main.nf index 5c9bb05f..9f5759d8 100644 --- a/modules/local/vcfsplit/main.nf +++ b/modules/local/vcfsplit/main.nf @@ -31,8 +31,8 @@ process VCFSPLIT { bcftools concat -a -Oz -o somatic.vcf.gz indels_pass.vcf.gz snv_pass.vcf.gz tabix -p vcf somatic.vcf.gz - bcftools view -i 'FILTER="NonSomatic" | Verdict_Germline' $indel_vcf | bgzip -c > indels_filtered.vcf.gz - bcftools view -i 'FILTER="NonSomatic" | Verdict_Germline' $snv_vcf | bgzip -c > snv_filtered.vcf.gz + bcftools view -i 'FILTER=="NonSomatic" || FILTER=="Verdict_Germline"' $indel_vcf | bgzip -c > indels_filtered.vcf.gz + bcftools view -i 'FILTER=="NonSomatic" || FILTER=="Verdict_Germline"' $snv_vcf | bgzip -c > snv_filtered.vcf.gz tabix -p vcf indels_filtered.vcf.gz tabix -p vcf snv_filtered.vcf.gz bcftools concat -a -Oz -o germline_tmp.vcf.gz indels_filtered.vcf.gz snv_filtered.vcf.gz From abf274fc0516b937408817c2454704683d401116 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 24 Mar 2026 13:33:21 +0100 Subject: [PATCH 19/36] Update modules/nf-core/bcftools/isec/main.nf Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- modules/nf-core/bcftools/isec/main.nf | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/nf-core/bcftools/isec/main.nf b/modules/nf-core/bcftools/isec/main.nf index 2bbf6263..cda1a662 100644 --- a/modules/nf-core/bcftools/isec/main.nf +++ b/modules/nf-core/bcftools/isec/main.nf @@ -52,5 +52,9 @@ process BCFTOOLS_ISEC { touch ${prefix}/0000.vcf.gz.tbi echo "" | gzip > ${prefix}/0001.vcf.gz touch ${prefix}/0001.vcf.gz.tbi + echo "" | gzip > ${prefix}/0002.vcf.gz + touch ${prefix}/0002.vcf.gz.tbi + echo "" | gzip > ${prefix}/0003.vcf.gz + touch ${prefix}/0003.vcf.gz.tbi """ } From c932ea45f54f1bdddea891c1df8784b1ed5cf680 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 24 Mar 2026 14:31:20 +0100 Subject: [PATCH 20/36] small fixes --- modules.json | 156 +++-- .../bcftools/query/bcftools-query.diff | 11 +- modules/nf-core/bcftools/query/main.nf | 4 +- nextflow.config | 3 - subworkflows/local/phasing_haplotyping.nf | 28 +- .../local/tumor_only/tumoronly_smallvar.nf | 4 +- tests/default.nf.test.snap | 605 +----------------- workflows/lrsomatic.nf | 6 +- 8 files changed, 153 insertions(+), 664 deletions(-) diff --git a/modules.json b/modules.json index b774fa63..8db781ed 100644 --- a/modules.json +++ b/modules.json @@ -8,180 +8,248 @@ "ascat": { "branch": "master", "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/ascat/ascat.diff" }, "bcftools/annotate": { "branch": "master", "git_sha": "3d9c2f4beaa4f62b3f006928fd9095a496d1e5a8", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/concat": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", - "installed_by": ["modules", "vcf_gather_bcftools"] + "installed_by": [ + "modules", + "vcf_gather_bcftools" + ] }, "bcftools/isec": { "branch": "master", "git_sha": "3b2c3559699a7bca6a7c2b220695a072e030e17d", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/isec/bcftools-isec.diff" }, "bcftools/merge": { "branch": "master", "git_sha": "3d9c2f4beaa4f62b3f006928fd9095a496d1e5a8", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/merge/bcftools-merge.diff" }, "bcftools/norm": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "bcftools/query": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/bcftools/query/bcftools-query.diff" }, "bcftools/sort": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", - "installed_by": ["modules", "vcf_gather_bcftools"], + "installed_by": [ + "modules", + "vcf_gather_bcftools" + ], "patch": "modules/nf-core/bcftools/sort/bcftools-sort.diff" }, "deepvariant/callvariants": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": ["deepvariant"], + "installed_by": [ + "deepvariant" + ], "patch": "modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff" }, "deepvariant/makeexamples": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": ["deepvariant"], + "installed_by": [ + "deepvariant" + ], "patch": "modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff" }, "deepvariant/postprocessvariants": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": ["deepvariant"], + "installed_by": [ + "deepvariant" + ], "patch": "modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff" }, "ensemblvep/download": { "branch": "master", "git_sha": "90cdd21fd96ccbdb3bc90797ca69570d18391055", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "ensemblvep/vep": { "branch": "master", "git_sha": "890fdcff71928fc1470d3e669d4c430c8c770297", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff" }, "longphase/haplotag": { "branch": "master", "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "longphase/phase": { "branch": "master", "git_sha": "47983538e45e539f783ed8ab0d1c96d39df2af8f", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/longphase/phase/longphase-phase.diff" }, "minimap2/align": { "branch": "master", "git_sha": "5c9f8d5b7671237c906abadc9ff732b301ca15ca", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "minimap2/index": { "branch": "master", "git_sha": "14980f759266eec42dac401fcafeb83d6c957b41", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "modkit/pileup": { "branch": "master", "git_sha": "3d81317a30d1016b533982d6b84df07713ae520a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "mosdepth": { "branch": "master", "git_sha": "6832b69ef7f98c54876d6436360b6b945370c615", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "multiqc": { "branch": "master", "git_sha": "2c73cc8fa92cf48de3da0b643fdf357a8a290b36", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "nanoplot": { "branch": "master", "git_sha": "682f789f93070bd047868300dd018faf3d434e7c", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "pigz/uncompress": { "branch": "master", "git_sha": "f84336b7fa91a65aa61d215b8c109fbb8e4b4ac6", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/cat": { "branch": "master", "git_sha": "f9edc59be2fe25bb6fc73ca4dfc0d28246f2a2d6", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/faidx": { "branch": "master", "git_sha": "b2e78932ef01165fd85829513eaca29eff8e640a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/flagstat": { "branch": "master", "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", - "installed_by": ["bam_stats_samtools"] + "installed_by": [ + "bam_stats_samtools" + ] }, "samtools/idxstats": { "branch": "master", "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", - "installed_by": ["bam_stats_samtools"] + "installed_by": [ + "bam_stats_samtools" + ] }, "samtools/index": { "branch": "master", "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "samtools/stats": { "branch": "master", "git_sha": "fe93fde0845f907fc91ad7cc7d797930408824df", - "installed_by": ["bam_stats_samtools"], + "installed_by": [ + "bam_stats_samtools" + ], "patch": "modules/nf-core/samtools/stats/samtools-stats.diff" }, "severus": { "branch": "master", "git_sha": "4dd9d8439a429c7ee566e0e2347f76ddeef27e66", - "installed_by": ["modules"], + "installed_by": [ + "modules" + ], "patch": "modules/nf-core/severus/severus.diff" }, "untar": { "branch": "master", "git_sha": "447f7bc0fa41dfc2400c8cad4c0291880dc060cf", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "unzip": { "branch": "master", "git_sha": "4dd9d8439a429c7ee566e0e2347f76ddeef27e66", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "wget": { "branch": "master", "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] }, "whatshap/stats": { "branch": "master", "git_sha": "bfab71f4d68c1aaff09335a3433e7b2836918b2a", - "installed_by": ["modules"] + "installed_by": [ + "modules" + ] } } }, @@ -190,31 +258,41 @@ "bam_stats_samtools": { "branch": "master", "git_sha": "7ac6cbe7c17c2dad685da7f70496c8f48ea48687", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "deepvariant": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": ["subworkflows"], + "installed_by": [ + "subworkflows" + ], "patch": "subworkflows/nf-core/deepvariant/deepvariant.diff" }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "65f5e638d901a51534c68fd5c1c19e8112fb4df1", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", - "installed_by": ["subworkflows"] + "installed_by": [ + "subworkflows" + ] } } } } } -} +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/bcftools-query.diff b/modules/nf-core/bcftools/query/bcftools-query.diff index 12ebaeaa..6ad99032 100644 --- a/modules/nf-core/bcftools/query/bcftools-query.diff +++ b/modules/nf-core/bcftools/query/bcftools-query.diff @@ -13,7 +13,7 @@ Changes in 'bcftools/query/main.nf': tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools when: -@@ -35,6 +36,8 @@ +@@ -35,12 +36,16 @@ ${args} \\ ${vcf} \\ > ${prefix}.${suffix} @@ -22,6 +22,15 @@ Changes in 'bcftools/query/main.nf': """ stub: + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + """ +- touch ${prefix}.${suffix} \\ ++ touch ${prefix}.${suffix} ++ bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz ++ tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz + """ + } 'modules/nf-core/bcftools/query/environment.yml' is unchanged 'modules/nf-core/bcftools/query/tests/main.nf.test' is unchanged diff --git a/modules/nf-core/bcftools/query/main.nf b/modules/nf-core/bcftools/query/main.nf index 996214f1..168a2ad9 100644 --- a/modules/nf-core/bcftools/query/main.nf +++ b/modules/nf-core/bcftools/query/main.nf @@ -44,6 +44,8 @@ process BCFTOOLS_QUERY { def prefix = task.ext.prefix ?: "${meta.id}" suffix = task.ext.suffix ?: "txt" """ - touch ${prefix}.${suffix} \\ + touch ${prefix}.${suffix} + bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz + tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz """ } diff --git a/nextflow.config b/nextflow.config index 2fcdce03..f8ad334a 100644 --- a/nextflow.config +++ b/nextflow.config @@ -45,12 +45,9 @@ params { skip_normalfiber = false skip_m6a = false skip_vep = false -<<<<<<< HEAD skip_modcall = false use_gpu = false -======= skip_whatshapstats = false ->>>>>>> 898570c5a6e2f0cc011d892b4a9654a63c2b57c0 // minimap2 options minimap2_ont_model = null diff --git a/subworkflows/local/phasing_haplotyping.nf b/subworkflows/local/phasing_haplotyping.nf index d7987b29..935be201 100644 --- a/subworkflows/local/phasing_haplotyping.nf +++ b/subworkflows/local/phasing_haplotyping.nf @@ -106,18 +106,7 @@ workflow PHASING_HAPLOTYPING { ) } - // PHASING - if (!params.skip_modcall) { - normal_bams_w_tumoronly_ch - .join(germline_vcf) - .join(LONGPHASE_MODCALL_GERMLINE.out.mod_vcf) - .map { meta, bam, bai, vcf, _tbi, mods-> - def svs = [] - return [ meta, bam, bai, vcf, svs, mods ] - } - .set{ longphase_phase_germline_input_ch } - - germline_vcf + germline_vcf .join(somatic_vcf) .map { meta, germline_vcf, germline_tbi, somatic_vcf, somatic_tbi -> def vcfs = [somatic_vcf, germline_vcf] @@ -132,6 +121,17 @@ workflow PHASING_HAPLOTYPING { BCFTOOLS_SORT.out.vcf .set{germline_somatic_vcfs} + // PHASING + if (!params.skip_modcall) { + normal_bams_w_tumoronly_ch + .join(germline_vcf) + .join(LONGPHASE_MODCALL_GERMLINE.out.mod_vcf) + .map { meta, bam, bai, vcf, _tbi, mods-> + def svs = [] + return [ meta, bam, bai, vcf, svs, mods ] + } + .set{ longphase_phase_germline_input_ch } + tumor_bams_ch .join(germline_somatic_vcfs) .join(LONGPHASE_MODCALL_SOMATIC.out.mod_vcf) @@ -153,7 +153,6 @@ workflow PHASING_HAPLOTYPING { tumor_bams_ch .join(germline_somatic_vcfs) - .join(LONGPHASE_MODCALL_SOMATIC.out.mod_vcf) .map { meta, bam, bai, vcf -> def svs = [] def mods = [] @@ -180,7 +179,7 @@ workflow PHASING_HAPLOTYPING { LONGPHASE_PHASE_SOMATIC.out.snv_vcf .join(LONGPHASE_PHASE_SOMATIC.out.snv_vcf_index) - .set{ phased_germline_vcf } + .set{ phased_somatic_vcf } // HAPLOTAGING // remove type for merging @@ -293,4 +292,5 @@ workflow PHASING_HAPLOTYPING { emit: tumor_normal_hapbams_ch phased_germline_vcf + phased_somatic_vcf } diff --git a/subworkflows/local/tumor_only/tumoronly_smallvar.nf b/subworkflows/local/tumor_only/tumoronly_smallvar.nf index 6fe9de8a..2dab2726 100644 --- a/subworkflows/local/tumor_only/tumoronly_smallvar.nf +++ b/subworkflows/local/tumor_only/tumoronly_smallvar.nf @@ -97,7 +97,7 @@ workflow TUMORONLY_SMALLVAR { } // COMBINE GERMLINE VARIANTS - if (params.germline_var_keep != 'clair' | params.germline_var_keep != 'deepvariant' ) { + if (params.germline_var_keep != 'clair' && params.germline_var_keep != 'deepvariant' ) { clairsto_germline_ch .mix(deepvariant_ch) .set{combined_germline_ch} @@ -146,7 +146,7 @@ workflow TUMORONLY_SMALLVAR { .set{deepsomatic_ch} } // COMBINE SOMATIC VARIATION - if (params.somatic_var_keep != 'clair' | params.somatic_var_keep != 'deepvariant' ) { + if (params.somatic_var_keep != 'clair' && params.somatic_var_keep != 'deepvariant' ) { clairsto_somatic_ch .mix(deepsomatic_ch) .set{combined_somatic_ch} diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 4d678311..7f3ef224 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,615 +1,18 @@ { "-profile test": { "content": [ - { - "BCFTOOLS_ANNOTATE": { - "bcftools": 1.22 - }, - "BCFTOOLS_CONCAT": { - "bcftools": 1.22 - }, - "BCFTOOLS_ISEC": { - "bcftools": 1.22 - }, - "BCFTOOLS_NORM": { - "bcftools": 1.22 - }, - "BCFTOOLS_QUERY": { - "bcftools": 1.22 - }, - "BCFTOOLS_SORT": { - "bcftools": 1.22 - }, - "CLAIR3": { - "clair3": "1.2.0" - }, - "CLAIRS": { - "clairs": "0.4.4" - }, - "CLAIRSTO": { - "clairsto": "0.4.2" - }, - "CRAMINO_POST": { - "cramino": "1.3.0" - }, - "CRAMINO_PRE": { - "cramino": "1.3.0" - }, - "DEEPSOMATIC_CALLVARIANTS": { - "deepsomatic": "1.7.0" - }, - "DEEPSOMATIC_MAKEEXAMPLES": { - "deepsomatic": "1.7.0" - }, - "DEEPSOMATIC_POSTPROCESSVARIANTS": { - "deepsomatic": "1.7.0" - }, - "DEEPVARIANT_CALLVARIANTS": { - "deepvariant": "1.9.0" - }, - "DEEPVARIANT_MAKEEXAMPLES": { - "deepvariant": "1.9.0" - }, - "DEEPVARIANT_POSTPROCESSVARIANTS": { - "deepvariant": "1.9.0" - }, - "GERMLINE_VEP": { - "ensemblvep": 115.2, - "perl-math-cdf": 0.1, - "tabix": 1.21 - }, - "METAEXTRACT": { - "samtools": 1.21 - }, - "MINIMAP2_ALIGN": { - "minimap2": "2.29-r1283" - }, - "MOSDEPTH": { - "mosdepth": "0.3.11" - }, - "SAMTOOLS_FAIDX": { - "samtools": "1.22.1" - }, - "SAMTOOLS_FLAGSTAT": { - "samtools": "1.22.1" - }, - "SAMTOOLS_IDXSTATS": { - "samtools": "1.22.1" - }, - "SAMTOOLS_INDEX": { - "samtools": "1.22.1" - }, - "SAMTOOLS_STATS": { - "samtools": "1.22.1" - }, - "SEVERUS": { - "severus": 1.6 - }, - "SOMATIC_VEP": { - "ensemblvep": 115.2, - "perl-math-cdf": 0.1, - "tabix": 1.21 - }, - "SV_VEP": { - "ensemblvep": 115.2, - "perl-math-cdf": 0.1, - "tabix": 1.21 - }, - "UNTAR": { - "untar": 1.34 - }, - "UNZIP_FASTA": { - "pigz": 2.8 - }, - "VCFSPLIT": { - "bcftools": 1.2 - }, - "WGET": { - "wget": "1.21.4" - }, - "WHATSHAP_STATS": { - "whatshap": 2.8 - }, - "Workflow": { - "IntGenomicsLab/lrsomatic": "v1.1.0dev" - } - }, + null, [ - "multiqc", - "multiqc/multiqc_data", - "multiqc/multiqc_data/llms-full.txt", - "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", - "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", - "multiqc/multiqc_data/mosdepth_cov_dist.txt", - "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", - "multiqc/multiqc_data/mosdepth_perchrom.txt", - "multiqc/multiqc_data/multiqc.log", - "multiqc/multiqc_data/multiqc.parquet", - "multiqc/multiqc_data/multiqc_citations.txt", - "multiqc/multiqc_data/multiqc_data.json", - "multiqc/multiqc_data/multiqc_general_stats.txt", - "multiqc/multiqc_data/multiqc_samtools_flagstat.txt", - "multiqc/multiqc_data/multiqc_samtools_idxstats.txt", - "multiqc/multiqc_data/multiqc_samtools_stats.txt", - "multiqc/multiqc_data/multiqc_software_versions.txt", - "multiqc/multiqc_data/multiqc_sources.txt", - "multiqc/multiqc_data/multiqc_whatshap_phased_bp_plot.txt", - "multiqc/multiqc_data/multiqc_whatshap_stats.txt", - "multiqc/multiqc_data/samtools-flagstat-pct-table.txt", - "multiqc/multiqc_data/samtools-flagstat-table.txt", - "multiqc/multiqc_data/samtools-idxstats-mapped-reads-plot_Normalised_Counts.txt", - "multiqc/multiqc_data/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts.txt", - "multiqc/multiqc_data/samtools-idxstats-mapped-reads-plot_Raw_Counts.txt", - "multiqc/multiqc_data/samtools-stats-dp.txt", - "multiqc/multiqc_data/samtools_alignment_plot.txt", - "multiqc/multiqc_data/whatshap-stats-table.txt", - "multiqc/multiqc_plots", - "multiqc/multiqc_plots/pdf", - "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", - "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", - "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", - "multiqc/multiqc_plots/pdf/multiqc_whatshap_phased_bp_plot.pdf", - "multiqc/multiqc_plots/pdf/samtools-flagstat-pct-table.pdf", - "multiqc/multiqc_plots/pdf/samtools-flagstat-table.pdf", - "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Normalised_Counts-cnt.pdf", - "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Normalised_Counts-log.pdf", - "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-cnt.pdf", - "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-log.pdf", - "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Raw_Counts-cnt.pdf", - "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Raw_Counts-log.pdf", - "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", - "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", - "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", - "multiqc/multiqc_plots/pdf/whatshap-stats-table.pdf", - "multiqc/multiqc_plots/png", - "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", - "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", - "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", - "multiqc/multiqc_plots/png/multiqc_whatshap_phased_bp_plot.png", - "multiqc/multiqc_plots/png/samtools-flagstat-pct-table.png", - "multiqc/multiqc_plots/png/samtools-flagstat-table.png", - "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Normalised_Counts-cnt.png", - "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Normalised_Counts-log.png", - "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-cnt.png", - "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-log.png", - "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Raw_Counts-cnt.png", - "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Raw_Counts-log.png", - "multiqc/multiqc_plots/png/samtools-stats-dp.png", - "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", - "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", - "multiqc/multiqc_plots/png/whatshap-stats-table.png", - "multiqc/multiqc_plots/svg", - "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", - "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", - "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", - "multiqc/multiqc_plots/svg/multiqc_whatshap_phased_bp_plot.svg", - "multiqc/multiqc_plots/svg/samtools-flagstat-pct-table.svg", - "multiqc/multiqc_plots/svg/samtools-flagstat-table.svg", - "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Normalised_Counts-cnt.svg", - "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Normalised_Counts-log.svg", - "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-cnt.svg", - "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-log.svg", - "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Raw_Counts-cnt.svg", - "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Raw_Counts-log.svg", - "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", - "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", - "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", - "multiqc/multiqc_plots/svg/whatshap-stats-table.svg", - "multiqc/multiqc_report.html", - "pipeline_info", - "pipeline_info/lrsomatic_software_mqc_versions.yml", - "sample1", - "sample1/bamfiles", - "sample1/bamfiles/sample1_normal.bam", - "sample1/bamfiles/sample1_normal.bam.bai", - "sample1/bamfiles/sample1_tumor.bam", - "sample1/bamfiles/sample1_tumor.bam.bai", - "sample1/qc", - "sample1/qc/normal", - "sample1/qc/normal/cramino_aln", - "sample1/qc/normal/cramino_aln/sample1_normal_cramino.txt", - "sample1/qc/normal/cramino_ubam_1", - "sample1/qc/normal/cramino_ubam_1/sample1_normal_cramino.txt", - "sample1/qc/normal/mosdepth", - "sample1/qc/normal/mosdepth/sample1.mosdepth.global.dist.txt", - "sample1/qc/normal/mosdepth/sample1.mosdepth.summary.txt", - "sample1/qc/normal/nanoplot_aln", - "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_NanoPlot-report.html", - "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_NanoStats.txt", - "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_Non_weightedHistogramReadlength.html", - "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_Non_weightedLogTransformed_HistogramReadlength.html", - "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", - "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_PercentIdentityvsAlignedReadLength_dot.html", - "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_PercentIdentityvsAlignedReadLength_kde.html", - "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_WeightedHistogramReadlength.html", - "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_WeightedLogTransformed_HistogramReadlength.html", - "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_Yield_By_Length.html", - "sample1/qc/normal/nanoplot_ubam_1", - "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_NanoPlot-report.html", - "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_NanoStats.txt", - "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_Non_weightedHistogramReadlength.html", - "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_Non_weightedLogTransformed_HistogramReadlength.html", - "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_WeightedHistogramReadlength.html", - "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_WeightedLogTransformed_HistogramReadlength.html", - "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_Yield_By_Length.html", - "sample1/qc/normal/samtools", - "sample1/qc/normal/samtools/sample1.flagstat", - "sample1/qc/normal/samtools/sample1.idxstats", - "sample1/qc/normal/samtools/sample1.stats", - "sample1/qc/tumor", - "sample1/qc/tumor/cramino_aln", - "sample1/qc/tumor/cramino_aln/sample1_tumor_cramino.txt", - "sample1/qc/tumor/cramino_ubam_1", - "sample1/qc/tumor/cramino_ubam_1/sample1_tumor_cramino.txt", - "sample1/qc/tumor/mosdepth", - "sample1/qc/tumor/mosdepth/sample1.mosdepth.global.dist.txt", - "sample1/qc/tumor/mosdepth/sample1.mosdepth.summary.txt", - "sample1/qc/tumor/nanoplot_aln", - "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_NanoPlot-report.html", - "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_NanoStats.txt", - "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_Non_weightedHistogramReadlength.html", - "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_Non_weightedLogTransformed_HistogramReadlength.html", - "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", - "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_PercentIdentityvsAlignedReadLength_dot.html", - "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_PercentIdentityvsAlignedReadLength_kde.html", - "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_WeightedHistogramReadlength.html", - "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_WeightedLogTransformed_HistogramReadlength.html", - "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_Yield_By_Length.html", - "sample1/qc/tumor/nanoplot_ubam_1", - "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_NanoPlot-report.html", - "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_NanoStats.txt", - "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_Non_weightedHistogramReadlength.html", - "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_Non_weightedLogTransformed_HistogramReadlength.html", - "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_WeightedHistogramReadlength.html", - "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_WeightedLogTransformed_HistogramReadlength.html", - "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_Yield_By_Length.html", - "sample1/qc/tumor/samtools", - "sample1/qc/tumor/samtools/sample1.flagstat", - "sample1/qc/tumor/samtools/sample1.idxstats", - "sample1/qc/tumor/samtools/sample1.stats", - "sample1/qc/whatshap_stats", - "sample1/qc/whatshap_stats/sample1_whatshap_stats.gtf", - "sample1/qc/whatshap_stats/sample1_whatshap_stats.log", - "sample1/qc/whatshap_stats/sample1_whatshap_stats.tsv", - "sample1/variants", - "sample1/variants/clair3", - "sample1/variants/clair3/merge_output.vcf.gz", - "sample1/variants/clair3/merge_output.vcf.gz.tbi", - "sample1/variants/clairs", - "sample1/variants/clairs/indel.vcf.gz", - "sample1/variants/clairs/indel.vcf.gz.tbi", - "sample1/variants/clairs/snvs.vcf.gz", - "sample1/variants/clairs/snvs.vcf.gz.tbi", - "sample1/variants/deepsomatic", - "sample1/variants/deepsomatic/sample1.vcf.gz", - "sample1/variants/deepsomatic/sample1.vcf.gz.tbi", - "sample1/variants/deepvariant", - "sample1/variants/deepvariant/sample1.g.vcf.gz", - "sample1/variants/deepvariant/sample1.g.vcf.gz.tbi", - "sample1/variants/deepvariant/sample1.vcf.gz", - "sample1/variants/deepvariant/sample1.vcf.gz.tbi", - "sample1/variants/phased", - "sample1/variants/phased/germline_smallvariants.vcf.gz", - "sample1/variants/phased/germline_smallvariants.vcf.gz.tbi", - "sample1/variants/phased/germline_smallvariants_mod.vcf.gz", - "sample1/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", - "sample1/variants/phased/somatic_smallvariants.vcf.gz", - "sample1/variants/phased/somatic_smallvariants.vcf.gz.tbi", - "sample1/variants/phased/somatic_smallvariants_mod.vcf.gz", - "sample1/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", - "sample1/variants/severus", - "sample1/variants/severus/all_SVs", - "sample1/variants/severus/all_SVs/breakpoint_clusters.tsv", - "sample1/variants/severus/all_SVs/breakpoint_clusters_list.tsv", - "sample1/variants/severus/all_SVs/severus_all.vcf.gz", - "sample1/variants/severus/breakpoints_double.csv", - "sample1/variants/severus/read_ids.csv", - "sample1/variants/severus/read_qual.txt", - "sample1/variants/severus/severus.log", - "sample1/variants/severus/somatic_SVs", - "sample1/variants/severus/somatic_SVs/breakpoint_clusters.tsv", - "sample1/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", - "sample1/variants/severus/somatic_SVs/severus_somatic.vcf.gz", - "sample1/vep", - "sample1/vep/SVs", - "sample1/vep/SVs/sample1_SV_VEP.vcf.gz", - "sample1/vep/SVs/sample1_SV_VEP.vcf.gz.tbi", - "sample1/vep/SVs/sample1_SV_VEP.vcf.gz_summary.html", - "sample1/vep/germline", - "sample1/vep/germline/sample1_GERMLINE_VEP.vcf.gz", - "sample1/vep/germline/sample1_GERMLINE_VEP.vcf.gz.tbi", - "sample1/vep/germline/sample1_GERMLINE_VEP.vcf.gz_summary.html", - "sample1/vep/somatic", - "sample1/vep/somatic/sample1_SOMATIC_VEP.vcf.gz", - "sample1/vep/somatic/sample1_SOMATIC_VEP.vcf.gz.tbi", - "sample1/vep/somatic/sample1_SOMATIC_VEP.vcf.gz_summary.html", - "sample2", - "sample2/bamfiles", - "sample2/bamfiles/sample2_normal.bam", - "sample2/bamfiles/sample2_normal.bam.bai", - "sample2/bamfiles/sample2_tumor.bam", - "sample2/bamfiles/sample2_tumor.bam.bai", - "sample2/qc", - "sample2/qc/normal", - "sample2/qc/normal/cramino_aln", - "sample2/qc/normal/cramino_aln/sample2_normal_cramino.txt", - "sample2/qc/normal/cramino_ubam_1", - "sample2/qc/normal/cramino_ubam_1/sample2_normal_cramino.txt", - "sample2/qc/normal/mosdepth", - "sample2/qc/normal/mosdepth/sample2.mosdepth.global.dist.txt", - "sample2/qc/normal/mosdepth/sample2.mosdepth.summary.txt", - "sample2/qc/normal/nanoplot_aln", - "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_NanoPlot-report.html", - "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_NanoStats.txt", - "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_Non_weightedHistogramReadlength.html", - "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_Non_weightedLogTransformed_HistogramReadlength.html", - "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", - "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_PercentIdentityvsAlignedReadLength_dot.html", - "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_PercentIdentityvsAlignedReadLength_kde.html", - "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_WeightedHistogramReadlength.html", - "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_WeightedLogTransformed_HistogramReadlength.html", - "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_Yield_By_Length.html", - "sample2/qc/normal/nanoplot_ubam_1", - "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_NanoPlot-report.html", - "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_NanoStats.txt", - "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_Non_weightedHistogramReadlength.html", - "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_Non_weightedLogTransformed_HistogramReadlength.html", - "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_WeightedHistogramReadlength.html", - "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_WeightedLogTransformed_HistogramReadlength.html", - "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_Yield_By_Length.html", - "sample2/qc/normal/samtools", - "sample2/qc/normal/samtools/sample2.flagstat", - "sample2/qc/normal/samtools/sample2.idxstats", - "sample2/qc/normal/samtools/sample2.stats", - "sample2/qc/tumor", - "sample2/qc/tumor/cramino_aln", - "sample2/qc/tumor/cramino_aln/sample2_tumor_cramino.txt", - "sample2/qc/tumor/cramino_ubam_1", - "sample2/qc/tumor/cramino_ubam_1/sample2_tumor_cramino.txt", - "sample2/qc/tumor/mosdepth", - "sample2/qc/tumor/mosdepth/sample2.mosdepth.global.dist.txt", - "sample2/qc/tumor/mosdepth/sample2.mosdepth.summary.txt", - "sample2/qc/tumor/nanoplot_aln", - "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_NanoPlot-report.html", - "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_NanoStats.txt", - "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_Non_weightedHistogramReadlength.html", - "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_Non_weightedLogTransformed_HistogramReadlength.html", - "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", - "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_PercentIdentityvsAlignedReadLength_dot.html", - "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_PercentIdentityvsAlignedReadLength_kde.html", - "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_WeightedHistogramReadlength.html", - "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_WeightedLogTransformed_HistogramReadlength.html", - "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_Yield_By_Length.html", - "sample2/qc/tumor/nanoplot_ubam_1", - "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_NanoPlot-report.html", - "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_NanoStats.txt", - "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_Non_weightedHistogramReadlength.html", - "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_Non_weightedLogTransformed_HistogramReadlength.html", - "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_WeightedHistogramReadlength.html", - "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_WeightedLogTransformed_HistogramReadlength.html", - "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_Yield_By_Length.html", - "sample2/qc/tumor/samtools", - "sample2/qc/tumor/samtools/sample2.flagstat", - "sample2/qc/tumor/samtools/sample2.idxstats", - "sample2/qc/tumor/samtools/sample2.stats", - "sample2/qc/whatshap_stats", - "sample2/qc/whatshap_stats/sample2_whatshap_stats.gtf", - "sample2/qc/whatshap_stats/sample2_whatshap_stats.log", - "sample2/qc/whatshap_stats/sample2_whatshap_stats.tsv", - "sample2/variants", - "sample2/variants/clair3", - "sample2/variants/clair3/merge_output.vcf.gz", - "sample2/variants/clair3/merge_output.vcf.gz.tbi", - "sample2/variants/clairs", - "sample2/variants/clairs/indel.vcf.gz", - "sample2/variants/clairs/indel.vcf.gz.tbi", - "sample2/variants/clairs/snvs.vcf.gz", - "sample2/variants/clairs/snvs.vcf.gz.tbi", - "sample2/variants/deepsomatic", - "sample2/variants/deepsomatic/sample2.vcf.gz", - "sample2/variants/deepsomatic/sample2.vcf.gz.tbi", - "sample2/variants/deepvariant", - "sample2/variants/deepvariant/sample2.g.vcf.gz", - "sample2/variants/deepvariant/sample2.g.vcf.gz.tbi", - "sample2/variants/deepvariant/sample2.vcf.gz", - "sample2/variants/deepvariant/sample2.vcf.gz.tbi", - "sample2/variants/phased", - "sample2/variants/phased/germline_smallvariants.vcf.gz", - "sample2/variants/phased/germline_smallvariants.vcf.gz.tbi", - "sample2/variants/phased/germline_smallvariants_mod.vcf.gz", - "sample2/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", - "sample2/variants/phased/somatic_smallvariants.vcf.gz", - "sample2/variants/phased/somatic_smallvariants.vcf.gz.tbi", - "sample2/variants/phased/somatic_smallvariants_mod.vcf.gz", - "sample2/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", - "sample2/variants/severus", - "sample2/variants/severus/all_SVs", - "sample2/variants/severus/all_SVs/breakpoint_clusters.tsv", - "sample2/variants/severus/all_SVs/breakpoint_clusters_list.tsv", - "sample2/variants/severus/all_SVs/severus_all.vcf.gz", - "sample2/variants/severus/breakpoints_double.csv", - "sample2/variants/severus/read_ids.csv", - "sample2/variants/severus/read_qual.txt", - "sample2/variants/severus/severus.log", - "sample2/variants/severus/somatic_SVs", - "sample2/variants/severus/somatic_SVs/breakpoint_clusters.tsv", - "sample2/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", - "sample2/variants/severus/somatic_SVs/severus_somatic.vcf.gz", - "sample2/vep", - "sample2/vep/SVs", - "sample2/vep/SVs/sample2_SV_VEP.vcf.gz", - "sample2/vep/SVs/sample2_SV_VEP.vcf.gz.tbi", - "sample2/vep/SVs/sample2_SV_VEP.vcf.gz_summary.html", - "sample2/vep/germline", - "sample2/vep/germline/sample2_GERMLINE_VEP.vcf.gz", - "sample2/vep/germline/sample2_GERMLINE_VEP.vcf.gz.tbi", - "sample2/vep/germline/sample2_GERMLINE_VEP.vcf.gz_summary.html", - "sample2/vep/somatic", - "sample2/vep/somatic/sample2_SOMATIC_VEP.vcf.gz", - "sample2/vep/somatic/sample2_SOMATIC_VEP.vcf.gz.tbi", - "sample2/vep/somatic/sample2_SOMATIC_VEP.vcf.gz_summary.html", - "sample3", - "sample3/bamfiles", - "sample3/bamfiles/sample3_tumor.bam", - "sample3/bamfiles/sample3_tumor.bam.bai", - "sample3/qc", - "sample3/qc/tumor", - "sample3/qc/tumor/cramino_aln", - "sample3/qc/tumor/cramino_aln/sample3_tumor_cramino.txt", - "sample3/qc/tumor/cramino_ubam_1", - "sample3/qc/tumor/cramino_ubam_1/sample3_tumor_cramino.txt", - "sample3/qc/tumor/mosdepth", - "sample3/qc/tumor/mosdepth/sample3.mosdepth.global.dist.txt", - "sample3/qc/tumor/mosdepth/sample3.mosdepth.summary.txt", - "sample3/qc/tumor/nanoplot_aln", - "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_NanoPlot-report.html", - "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_NanoStats.txt", - "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_Non_weightedHistogramReadlength.html", - "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_Non_weightedLogTransformed_HistogramReadlength.html", - "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", - "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_PercentIdentityvsAlignedReadLength_dot.html", - "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_PercentIdentityvsAlignedReadLength_kde.html", - "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_WeightedHistogramReadlength.html", - "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_WeightedLogTransformed_HistogramReadlength.html", - "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_Yield_By_Length.html", - "sample3/qc/tumor/nanoplot_ubam_1", - "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_NanoPlot-report.html", - "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_NanoStats.txt", - "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_Non_weightedHistogramReadlength.html", - "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_Non_weightedLogTransformed_HistogramReadlength.html", - "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_WeightedHistogramReadlength.html", - "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_WeightedLogTransformed_HistogramReadlength.html", - "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_Yield_By_Length.html", - "sample3/qc/tumor/samtools", - "sample3/qc/tumor/samtools/sample3.flagstat", - "sample3/qc/tumor/samtools/sample3.idxstats", - "sample3/qc/tumor/samtools/sample3.stats", - "sample3/qc/whatshap_stats", - "sample3/qc/whatshap_stats/sample3_whatshap_stats.gtf", - "sample3/qc/whatshap_stats/sample3_whatshap_stats.log", - "sample3/qc/whatshap_stats/sample3_whatshap_stats.tsv", - "sample3/variants", - "sample3/variants/clairsto", - "sample3/variants/clairsto/germline.vcf.gz", - "sample3/variants/clairsto/germline.vcf.gz.tbi", - "sample3/variants/clairsto/indel.vcf.gz", - "sample3/variants/clairsto/indel.vcf.gz.tbi", - "sample3/variants/clairsto/snv.vcf.gz", - "sample3/variants/clairsto/snv.vcf.gz.tbi", - "sample3/variants/clairsto/somatic.vcf.gz", - "sample3/variants/clairsto/somatic.vcf.gz.tbi", - "sample3/variants/deepsomatic", - "sample3/variants/deepsomatic/sample3.vcf.gz", - "sample3/variants/deepsomatic/sample3.vcf.gz.tbi", - "sample3/variants/deepvariant", - "sample3/variants/deepvariant/sample3.g.vcf.gz", - "sample3/variants/deepvariant/sample3.g.vcf.gz.tbi", - "sample3/variants/deepvariant/sample3.vcf.gz", - "sample3/variants/deepvariant/sample3.vcf.gz.tbi", - "sample3/variants/phased", - "sample3/variants/phased/germline_smallvariants.vcf.gz", - "sample3/variants/phased/germline_smallvariants.vcf.gz.tbi", - "sample3/variants/phased/germline_smallvariants_mod.vcf.gz", - "sample3/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", - "sample3/variants/phased/somatic_smallvariants.vcf.gz", - "sample3/variants/phased/somatic_smallvariants.vcf.gz.tbi", - "sample3/variants/phased/somatic_smallvariants_mod.vcf.gz", - "sample3/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", - "sample3/variants/severus", - "sample3/variants/severus/all_SVs", - "sample3/variants/severus/all_SVs/breakpoint_clusters.tsv", - "sample3/variants/severus/all_SVs/breakpoint_clusters_list.tsv", - "sample3/variants/severus/all_SVs/severus_all.vcf.gz", - "sample3/variants/severus/breakpoints_double.csv", - "sample3/variants/severus/read_ids.csv", - "sample3/variants/severus/read_qual.txt", - "sample3/variants/severus/severus.log", - "sample3/variants/severus/somatic_SVs", - "sample3/variants/severus/somatic_SVs/breakpoint_clusters.tsv", - "sample3/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", - "sample3/variants/severus/somatic_SVs/severus_somatic.vcf.gz", - "sample3/vep", - "sample3/vep/SVs", - "sample3/vep/SVs/sample3_SV_VEP.vcf.gz", - "sample3/vep/SVs/sample3_SV_VEP.vcf.gz.tbi", - "sample3/vep/SVs/sample3_SV_VEP.vcf.gz_summary.html", - "sample3/vep/germline", - "sample3/vep/germline/sample3_GERMLINE_VEP.vcf.gz", - "sample3/vep/germline/sample3_GERMLINE_VEP.vcf.gz.tbi", - "sample3/vep/germline/sample3_GERMLINE_VEP.vcf.gz_summary.html", - "sample3/vep/somatic", - "sample3/vep/somatic/sample3_SOMATIC_VEP.vcf.gz", - "sample3/vep/somatic/sample3_SOMATIC_VEP.vcf.gz.tbi", - "sample3/vep/somatic/sample3_SOMATIC_VEP.vcf.gz_summary.html" + "pipeline_info" ], [ - "sample1_normal.bam:md5,92a00e311e085a34d443cb64694ce839", - "sample1_normal.bam.bai:md5,c7dff8adc4c8d33a81fb8ea7dff4a98e", - "sample1_tumor.bam:md5,c6c79808f928393b23ae53976a7304c3", - "sample1_tumor.bam.bai:md5,317d711c0c26d2cfb933ad53b69da1d7", - "sample1.flagstat:md5,1c41ea9923945501eb7e41f83a90502d", - "sample1.idxstats:md5,902e503387799123ea59255e3fca172c", - "sample1.stats:md5,70fabbdc07dec0479b3fc7dcec344054", - "sample1.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", - "sample1.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", - "sample1.stats:md5,5012c82d3d3ca60ffdd2fb970f772566", - "sample1.vcf.gz:md5,92c0fa9016c3d8b192eb382fd6a81199", - "sample1.vcf.gz.tbi:md5,04e82577dc57f80b9db25897389364b2", - "sample1.g.vcf.gz:md5,5ed06f35ccecf7aadbec54873dc07e64", - "sample1.g.vcf.gz.tbi:md5,ca628ef368d34a7a6c77098a4c4bdf36", - "sample1.vcf.gz:md5,ad971a535d2b8014fabdabe72995a5db", - "sample1.vcf.gz.tbi:md5,65de5caace8d4312afa707c3bfd6fa45", - "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", - "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "breakpoints_double.csv:md5,d63f058075bfe791248954ca2ee6c4fb", - "read_qual.txt:md5,78247dfa2ea336eac0e128eba5e9eef4", - "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", - "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "sample2_normal.bam:md5,ed6aab4d2ef70537390a0ce85276ad2c", - "sample2_normal.bam.bai:md5,fb92c167c63e7f62d0c4c3fea8bd1b60", - "sample2_tumor.bam:md5,10c29ced5ed253731ca50097d9c848e3", - "sample2_tumor.bam.bai:md5,e0aa6d8d594070e753145503520fffab", - "sample2.flagstat:md5,714d0cc0c213e2640e54a16f3d0e6e7e", - "sample2.idxstats:md5,72eb83bb11748dc863fef1a0a5497e4b", - "sample2.stats:md5,87cb6e9adf8a133244e8b331be43bb14", - "sample2.flagstat:md5,4344a8745efef9cc2a017024218d61c6", - "sample2.idxstats:md5,69467fc02c83a30084736aeea8b785fb", - "sample2.stats:md5,1e044857eeefb284fda88ee58ff7a04a", - "sample2.vcf.gz:md5,f9d6266ee49c2544dc28eef0cdd9dcae", - "sample2.vcf.gz.tbi:md5,433ecd219947787d00dd0fada6307661", - "sample2.g.vcf.gz:md5,7998d15ce582ec94e86568e3fae654f9", - "sample2.g.vcf.gz.tbi:md5,9498e3266900eee2448ff05b0bce87c0", - "sample2.vcf.gz:md5,33330d8444a1774864883b33e1e2235c", - "sample2.vcf.gz.tbi:md5,384b8b5ea7c1559b464031f8e0863532", - "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", - "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "breakpoints_double.csv:md5,a68d7fc9c7a7cb2f31e73189c5412f7b", - "read_qual.txt:md5,8b92ff7dc4536188be159b95525511cd", - "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", - "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "sample3_tumor.bam:md5,5020a416186da412d9e89f7efac64178", - "sample3_tumor.bam.bai:md5,fc26fd7d9b388e6551898aacf5ed0c7b", - "sample3.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", - "sample3.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", - "sample3.stats:md5,d7a8552a8a41a217954a0c825d468a60", - "sample3.vcf.gz:md5,ee4f7a5bbd471c1370accf888b8262c0", - "sample3.vcf.gz.tbi:md5,944349ec46ecc95bab7db2f6848d0c36", - "sample3.g.vcf.gz:md5,64adc407ef2434254e76763b72c67b08", - "sample3.g.vcf.gz.tbi:md5,ac0397f7a840cf618d56f097defba878", - "sample3.vcf.gz:md5,9c52a803d796157a08b565c653d4685c", - "sample3.vcf.gz.tbi:md5,f0b3599e4281a6f3bbb8bc408740e58e", - "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", - "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "breakpoints_double.csv:md5,46e03bf2d67aa736b599f00fe7f01e06", - "read_qual.txt:md5,b918430d35354dad1d7f02f21e4cd4ed", - "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", - "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50" + ] ], "meta": { "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-24T11:55:01.760175088" + "timestamp": "2026-03-24T14:22:30.952285683" } } \ No newline at end of file diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index 35d03626..3e600273 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -96,7 +96,7 @@ workflow LRSOMATIC { params.vep_genome = getGenomeAttribute('vep_genome') params.vep_species = getGenomeAttribute('vep_species') - if (params.pons_vcfs != null) { + if (params.pon_vcfs != null) { pon_files = params.pon_vcfs.collect { file(it) } pon_flags = params.pon_flags } @@ -487,14 +487,14 @@ workflow LRSOMATIC { ) - ch_somatic_vcf + PHASING_HAPLOTYPING.out.phased_somatic_vcf .map { meta, vcf, _tbi -> def extra = [] return [meta, vcf, extra] } .set { somatic_vep } - ch_germline_vcf + PHASING_HAPLOTYPING.out.phased_germline_vcf .map { meta, vcf, _tbi -> def extra = [] return [meta, vcf, extra] From bf44b0ad9223529ff5f37427d3a9519ac2ff0126 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 24 Mar 2026 14:56:07 +0100 Subject: [PATCH 21/36] update snap --- tests/default.nf.test.snap | 614 ++++++++++++++++++++++++++++++++++++- 1 file changed, 610 insertions(+), 4 deletions(-) diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 7f3ef224..6b04accb 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -1,18 +1,624 @@ { "-profile test": { "content": [ - null, + { + "BCFTOOLS_ANNOTATE": { + "bcftools": 1.22 + }, + "BCFTOOLS_CONCAT": { + "bcftools": 1.22 + }, + "BCFTOOLS_ISEC": { + "bcftools": 1.22 + }, + "BCFTOOLS_NORM": { + "bcftools": 1.22 + }, + "BCFTOOLS_QUERY": { + "bcftools": 1.22 + }, + "BCFTOOLS_SORT": { + "bcftools": 1.22 + }, + "CLAIR3": { + "clair3": "1.2.0" + }, + "CLAIRS": { + "clairs": "0.4.4" + }, + "CLAIRSTO": { + "clairsto": "0.4.2" + }, + "CRAMINO_POST": { + "cramino": "1.3.0" + }, + "CRAMINO_PRE": { + "cramino": "1.3.0" + }, + "DEEPSOMATIC_CALLVARIANTS": { + "deepsomatic": "1.7.0" + }, + "DEEPSOMATIC_MAKEEXAMPLES": { + "deepsomatic": "1.7.0" + }, + "DEEPSOMATIC_POSTPROCESSVARIANTS": { + "deepsomatic": "1.7.0" + }, + "DEEPVARIANT_CALLVARIANTS": { + "deepvariant": "1.9.0" + }, + "DEEPVARIANT_MAKEEXAMPLES": { + "deepvariant": "1.9.0" + }, + "DEEPVARIANT_POSTPROCESSVARIANTS": { + "deepvariant": "1.9.0" + }, + "GERMLINE_VEP": { + "ensemblvep": 115.2, + "perl-math-cdf": 0.1, + "tabix": 1.21 + }, + "METAEXTRACT": { + "samtools": 1.21 + }, + "MINIMAP2_ALIGN": { + "minimap2": "2.29-r1283" + }, + "MOSDEPTH": { + "mosdepth": "0.3.11" + }, + "SAMTOOLS_FAIDX": { + "samtools": "1.22.1" + }, + "SAMTOOLS_FLAGSTAT": { + "samtools": "1.22.1" + }, + "SAMTOOLS_IDXSTATS": { + "samtools": "1.22.1" + }, + "SAMTOOLS_INDEX": { + "samtools": "1.22.1" + }, + "SAMTOOLS_STATS": { + "samtools": "1.22.1" + }, + "SEVERUS": { + "severus": 1.6 + }, + "SOMATIC_VEP": { + "ensemblvep": 115.2, + "perl-math-cdf": 0.1, + "tabix": 1.21 + }, + "SV_VEP": { + "ensemblvep": 115.2, + "perl-math-cdf": 0.1, + "tabix": 1.21 + }, + "UNTAR": { + "untar": 1.34 + }, + "UNZIP_FASTA": { + "pigz": 2.8 + }, + "VCFSPLIT": { + "bcftools": 1.2 + }, + "WGET": { + "wget": "1.21.4" + }, + "WHATSHAP_STATS": { + "whatshap": 2.8 + }, + "Workflow": { + "IntGenomicsLab/lrsomatic": "v1.1.0dev" + } + }, [ - "pipeline_info" + "multiqc", + "multiqc/multiqc_data", + "multiqc/multiqc_data/llms-full.txt", + "multiqc/multiqc_data/mosdepth-coverage-per-contig-single.txt", + "multiqc/multiqc_data/mosdepth-cumcoverage-dist-id.txt", + "multiqc/multiqc_data/mosdepth_cov_dist.txt", + "multiqc/multiqc_data/mosdepth_cumcov_dist.txt", + "multiqc/multiqc_data/mosdepth_perchrom.txt", + "multiqc/multiqc_data/multiqc.log", + "multiqc/multiqc_data/multiqc.parquet", + "multiqc/multiqc_data/multiqc_citations.txt", + "multiqc/multiqc_data/multiqc_data.json", + "multiqc/multiqc_data/multiqc_general_stats.txt", + "multiqc/multiqc_data/multiqc_samtools_flagstat.txt", + "multiqc/multiqc_data/multiqc_samtools_idxstats.txt", + "multiqc/multiqc_data/multiqc_samtools_stats.txt", + "multiqc/multiqc_data/multiqc_software_versions.txt", + "multiqc/multiqc_data/multiqc_sources.txt", + "multiqc/multiqc_data/multiqc_whatshap_phased_bp_plot.txt", + "multiqc/multiqc_data/multiqc_whatshap_stats.txt", + "multiqc/multiqc_data/samtools-flagstat-pct-table.txt", + "multiqc/multiqc_data/samtools-flagstat-table.txt", + "multiqc/multiqc_data/samtools-idxstats-mapped-reads-plot_Normalised_Counts.txt", + "multiqc/multiqc_data/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts.txt", + "multiqc/multiqc_data/samtools-idxstats-mapped-reads-plot_Raw_Counts.txt", + "multiqc/multiqc_data/samtools-stats-dp.txt", + "multiqc/multiqc_data/samtools_alignment_plot.txt", + "multiqc/multiqc_data/whatshap-stats-table.txt", + "multiqc/multiqc_plots", + "multiqc/multiqc_plots/pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-cnt.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-coverage-per-contig-single-pct.pdf", + "multiqc/multiqc_plots/pdf/mosdepth-cumcoverage-dist-id.pdf", + "multiqc/multiqc_plots/pdf/multiqc_whatshap_phased_bp_plot.pdf", + "multiqc/multiqc_plots/pdf/samtools-flagstat-pct-table.pdf", + "multiqc/multiqc_plots/pdf/samtools-flagstat-table.pdf", + "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Normalised_Counts-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Normalised_Counts-log.pdf", + "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-log.pdf", + "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Raw_Counts-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools-idxstats-mapped-reads-plot_Raw_Counts-log.pdf", + "multiqc/multiqc_plots/pdf/samtools-stats-dp.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-cnt.pdf", + "multiqc/multiqc_plots/pdf/samtools_alignment_plot-pct.pdf", + "multiqc/multiqc_plots/pdf/whatshap-stats-table.pdf", + "multiqc/multiqc_plots/png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-cnt.png", + "multiqc/multiqc_plots/png/mosdepth-coverage-per-contig-single-pct.png", + "multiqc/multiqc_plots/png/mosdepth-cumcoverage-dist-id.png", + "multiqc/multiqc_plots/png/multiqc_whatshap_phased_bp_plot.png", + "multiqc/multiqc_plots/png/samtools-flagstat-pct-table.png", + "multiqc/multiqc_plots/png/samtools-flagstat-table.png", + "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Normalised_Counts-cnt.png", + "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Normalised_Counts-log.png", + "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-cnt.png", + "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-log.png", + "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Raw_Counts-cnt.png", + "multiqc/multiqc_plots/png/samtools-idxstats-mapped-reads-plot_Raw_Counts-log.png", + "multiqc/multiqc_plots/png/samtools-stats-dp.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-cnt.png", + "multiqc/multiqc_plots/png/samtools_alignment_plot-pct.png", + "multiqc/multiqc_plots/png/whatshap-stats-table.png", + "multiqc/multiqc_plots/svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-cnt.svg", + "multiqc/multiqc_plots/svg/mosdepth-coverage-per-contig-single-pct.svg", + "multiqc/multiqc_plots/svg/mosdepth-cumcoverage-dist-id.svg", + "multiqc/multiqc_plots/svg/multiqc_whatshap_phased_bp_plot.svg", + "multiqc/multiqc_plots/svg/samtools-flagstat-pct-table.svg", + "multiqc/multiqc_plots/svg/samtools-flagstat-table.svg", + "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Normalised_Counts-cnt.svg", + "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Normalised_Counts-log.svg", + "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-cnt.svg", + "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Observed_over_Expected_Counts-log.svg", + "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Raw_Counts-cnt.svg", + "multiqc/multiqc_plots/svg/samtools-idxstats-mapped-reads-plot_Raw_Counts-log.svg", + "multiqc/multiqc_plots/svg/samtools-stats-dp.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-cnt.svg", + "multiqc/multiqc_plots/svg/samtools_alignment_plot-pct.svg", + "multiqc/multiqc_plots/svg/whatshap-stats-table.svg", + "multiqc/multiqc_report.html", + "pipeline_info", + "pipeline_info/lrsomatic_software_mqc_versions.yml", + "sample1", + "sample1/bamfiles", + "sample1/bamfiles/sample1_normal.bam", + "sample1/bamfiles/sample1_normal.bam.bai", + "sample1/bamfiles/sample1_tumor.bam", + "sample1/bamfiles/sample1_tumor.bam.bai", + "sample1/qc", + "sample1/qc/normal", + "sample1/qc/normal/cramino_aln", + "sample1/qc/normal/cramino_aln/sample1_normal_cramino.txt", + "sample1/qc/normal/cramino_ubam_1", + "sample1/qc/normal/cramino_ubam_1/sample1_normal_cramino.txt", + "sample1/qc/normal/mosdepth", + "sample1/qc/normal/mosdepth/sample1.mosdepth.global.dist.txt", + "sample1/qc/normal/mosdepth/sample1.mosdepth.summary.txt", + "sample1/qc/normal/nanoplot_aln", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_NanoPlot-report.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_NanoStats.txt", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_Non_weightedHistogramReadlength.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_Non_weightedLogTransformed_HistogramReadlength.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_PercentIdentityvsAlignedReadLength_dot.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_PercentIdentityvsAlignedReadLength_kde.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_WeightedHistogramReadlength.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_WeightedLogTransformed_HistogramReadlength.html", + "sample1/qc/normal/nanoplot_aln/sample1_normal_aln_Yield_By_Length.html", + "sample1/qc/normal/nanoplot_ubam_1", + "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_NanoPlot-report.html", + "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_NanoStats.txt", + "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_Non_weightedHistogramReadlength.html", + "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_Non_weightedLogTransformed_HistogramReadlength.html", + "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_WeightedHistogramReadlength.html", + "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_WeightedLogTransformed_HistogramReadlength.html", + "sample1/qc/normal/nanoplot_ubam_1/sample1_normal_ubam_Yield_By_Length.html", + "sample1/qc/normal/samtools", + "sample1/qc/normal/samtools/sample1.flagstat", + "sample1/qc/normal/samtools/sample1.idxstats", + "sample1/qc/normal/samtools/sample1.stats", + "sample1/qc/tumor", + "sample1/qc/tumor/cramino_aln", + "sample1/qc/tumor/cramino_aln/sample1_tumor_cramino.txt", + "sample1/qc/tumor/cramino_ubam_1", + "sample1/qc/tumor/cramino_ubam_1/sample1_tumor_cramino.txt", + "sample1/qc/tumor/mosdepth", + "sample1/qc/tumor/mosdepth/sample1.mosdepth.global.dist.txt", + "sample1/qc/tumor/mosdepth/sample1.mosdepth.summary.txt", + "sample1/qc/tumor/nanoplot_aln", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_NanoPlot-report.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_NanoStats.txt", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_Non_weightedHistogramReadlength.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_Non_weightedLogTransformed_HistogramReadlength.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_PercentIdentityvsAlignedReadLength_dot.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_PercentIdentityvsAlignedReadLength_kde.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_WeightedHistogramReadlength.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_WeightedLogTransformed_HistogramReadlength.html", + "sample1/qc/tumor/nanoplot_aln/sample1_tumor_aln_Yield_By_Length.html", + "sample1/qc/tumor/nanoplot_ubam_1", + "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_NanoPlot-report.html", + "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_NanoStats.txt", + "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_Non_weightedHistogramReadlength.html", + "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_Non_weightedLogTransformed_HistogramReadlength.html", + "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_WeightedHistogramReadlength.html", + "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_WeightedLogTransformed_HistogramReadlength.html", + "sample1/qc/tumor/nanoplot_ubam_1/sample1_tumor_ubam_Yield_By_Length.html", + "sample1/qc/tumor/samtools", + "sample1/qc/tumor/samtools/sample1.flagstat", + "sample1/qc/tumor/samtools/sample1.idxstats", + "sample1/qc/tumor/samtools/sample1.stats", + "sample1/qc/whatshap_stats", + "sample1/qc/whatshap_stats/sample1_whatshap_stats.gtf", + "sample1/qc/whatshap_stats/sample1_whatshap_stats.log", + "sample1/qc/whatshap_stats/sample1_whatshap_stats.tsv", + "sample1/variants", + "sample1/variants/clair3", + "sample1/variants/clair3/merge_output.vcf.gz", + "sample1/variants/clair3/merge_output.vcf.gz.tbi", + "sample1/variants/clairs", + "sample1/variants/clairs/indel.vcf.gz", + "sample1/variants/clairs/indel.vcf.gz.tbi", + "sample1/variants/clairs/snvs.vcf.gz", + "sample1/variants/clairs/snvs.vcf.gz.tbi", + "sample1/variants/deepsomatic", + "sample1/variants/deepsomatic/sample1.vcf.gz", + "sample1/variants/deepsomatic/sample1.vcf.gz.tbi", + "sample1/variants/deepvariant", + "sample1/variants/deepvariant/sample1.g.vcf.gz", + "sample1/variants/deepvariant/sample1.g.vcf.gz.tbi", + "sample1/variants/deepvariant/sample1.vcf.gz", + "sample1/variants/deepvariant/sample1.vcf.gz.tbi", + "sample1/variants/phased", + "sample1/variants/phased/germline_smallvariants.vcf.gz", + "sample1/variants/phased/germline_smallvariants.vcf.gz.tbi", + "sample1/variants/phased/germline_smallvariants_mod.vcf.gz", + "sample1/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", + "sample1/variants/phased/somatic_smallvariants.vcf.gz", + "sample1/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample1/variants/phased/somatic_smallvariants_mod.vcf.gz", + "sample1/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", + "sample1/variants/severus", + "sample1/variants/severus/all_SVs", + "sample1/variants/severus/all_SVs/breakpoint_clusters.tsv", + "sample1/variants/severus/all_SVs/breakpoint_clusters_list.tsv", + "sample1/variants/severus/all_SVs/severus_all.vcf.gz", + "sample1/variants/severus/breakpoints_double.csv", + "sample1/variants/severus/read_ids.csv", + "sample1/variants/severus/read_qual.txt", + "sample1/variants/severus/severus.log", + "sample1/variants/severus/somatic_SVs", + "sample1/variants/severus/somatic_SVs/breakpoint_clusters.tsv", + "sample1/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", + "sample1/variants/severus/somatic_SVs/severus_somatic.vcf.gz", + "sample1/vep", + "sample1/vep/SVs", + "sample1/vep/SVs/sample1_SV_VEP.vcf.gz", + "sample1/vep/SVs/sample1_SV_VEP.vcf.gz.tbi", + "sample1/vep/SVs/sample1_SV_VEP.vcf.gz_summary.html", + "sample1/vep/germline", + "sample1/vep/germline/sample1_GERMLINE_VEP.vcf.gz", + "sample1/vep/germline/sample1_GERMLINE_VEP.vcf.gz.tbi", + "sample1/vep/germline/sample1_GERMLINE_VEP.vcf.gz_summary.html", + "sample1/vep/somatic", + "sample1/vep/somatic/sample1_SOMATIC_VEP.vcf.gz", + "sample1/vep/somatic/sample1_SOMATIC_VEP.vcf.gz.tbi", + "sample1/vep/somatic/sample1_SOMATIC_VEP.vcf.gz_summary.html", + "sample2", + "sample2/bamfiles", + "sample2/bamfiles/sample2_normal.bam", + "sample2/bamfiles/sample2_normal.bam.bai", + "sample2/bamfiles/sample2_tumor.bam", + "sample2/bamfiles/sample2_tumor.bam.bai", + "sample2/qc", + "sample2/qc/normal", + "sample2/qc/normal/cramino_aln", + "sample2/qc/normal/cramino_aln/sample2_normal_cramino.txt", + "sample2/qc/normal/cramino_ubam_1", + "sample2/qc/normal/cramino_ubam_1/sample2_normal_cramino.txt", + "sample2/qc/normal/mosdepth", + "sample2/qc/normal/mosdepth/sample2.mosdepth.global.dist.txt", + "sample2/qc/normal/mosdepth/sample2.mosdepth.summary.txt", + "sample2/qc/normal/nanoplot_aln", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_NanoPlot-report.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_NanoStats.txt", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_Non_weightedHistogramReadlength.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_Non_weightedLogTransformed_HistogramReadlength.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_PercentIdentityvsAlignedReadLength_dot.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_PercentIdentityvsAlignedReadLength_kde.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_WeightedHistogramReadlength.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_WeightedLogTransformed_HistogramReadlength.html", + "sample2/qc/normal/nanoplot_aln/sample2_normal_aln_Yield_By_Length.html", + "sample2/qc/normal/nanoplot_ubam_1", + "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_NanoPlot-report.html", + "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_NanoStats.txt", + "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_Non_weightedHistogramReadlength.html", + "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_Non_weightedLogTransformed_HistogramReadlength.html", + "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_WeightedHistogramReadlength.html", + "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_WeightedLogTransformed_HistogramReadlength.html", + "sample2/qc/normal/nanoplot_ubam_1/sample2_normal_ubam_Yield_By_Length.html", + "sample2/qc/normal/samtools", + "sample2/qc/normal/samtools/sample2.flagstat", + "sample2/qc/normal/samtools/sample2.idxstats", + "sample2/qc/normal/samtools/sample2.stats", + "sample2/qc/tumor", + "sample2/qc/tumor/cramino_aln", + "sample2/qc/tumor/cramino_aln/sample2_tumor_cramino.txt", + "sample2/qc/tumor/cramino_ubam_1", + "sample2/qc/tumor/cramino_ubam_1/sample2_tumor_cramino.txt", + "sample2/qc/tumor/mosdepth", + "sample2/qc/tumor/mosdepth/sample2.mosdepth.global.dist.txt", + "sample2/qc/tumor/mosdepth/sample2.mosdepth.summary.txt", + "sample2/qc/tumor/nanoplot_aln", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_NanoPlot-report.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_NanoStats.txt", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_Non_weightedHistogramReadlength.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_Non_weightedLogTransformed_HistogramReadlength.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_PercentIdentityvsAlignedReadLength_dot.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_PercentIdentityvsAlignedReadLength_kde.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_WeightedHistogramReadlength.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_WeightedLogTransformed_HistogramReadlength.html", + "sample2/qc/tumor/nanoplot_aln/sample2_tumor_aln_Yield_By_Length.html", + "sample2/qc/tumor/nanoplot_ubam_1", + "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_NanoPlot-report.html", + "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_NanoStats.txt", + "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_Non_weightedHistogramReadlength.html", + "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_Non_weightedLogTransformed_HistogramReadlength.html", + "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_WeightedHistogramReadlength.html", + "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_WeightedLogTransformed_HistogramReadlength.html", + "sample2/qc/tumor/nanoplot_ubam_1/sample2_tumor_ubam_Yield_By_Length.html", + "sample2/qc/tumor/samtools", + "sample2/qc/tumor/samtools/sample2.flagstat", + "sample2/qc/tumor/samtools/sample2.idxstats", + "sample2/qc/tumor/samtools/sample2.stats", + "sample2/qc/whatshap_stats", + "sample2/qc/whatshap_stats/sample2_whatshap_stats.gtf", + "sample2/qc/whatshap_stats/sample2_whatshap_stats.log", + "sample2/qc/whatshap_stats/sample2_whatshap_stats.tsv", + "sample2/variants", + "sample2/variants/clair3", + "sample2/variants/clair3/merge_output.vcf.gz", + "sample2/variants/clair3/merge_output.vcf.gz.tbi", + "sample2/variants/clairs", + "sample2/variants/clairs/indel.vcf.gz", + "sample2/variants/clairs/indel.vcf.gz.tbi", + "sample2/variants/clairs/snvs.vcf.gz", + "sample2/variants/clairs/snvs.vcf.gz.tbi", + "sample2/variants/deepsomatic", + "sample2/variants/deepsomatic/sample2.vcf.gz", + "sample2/variants/deepsomatic/sample2.vcf.gz.tbi", + "sample2/variants/deepvariant", + "sample2/variants/deepvariant/sample2.g.vcf.gz", + "sample2/variants/deepvariant/sample2.g.vcf.gz.tbi", + "sample2/variants/deepvariant/sample2.vcf.gz", + "sample2/variants/deepvariant/sample2.vcf.gz.tbi", + "sample2/variants/phased", + "sample2/variants/phased/germline_smallvariants.vcf.gz", + "sample2/variants/phased/germline_smallvariants.vcf.gz.tbi", + "sample2/variants/phased/germline_smallvariants_mod.vcf.gz", + "sample2/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", + "sample2/variants/phased/somatic_smallvariants.vcf.gz", + "sample2/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample2/variants/phased/somatic_smallvariants_mod.vcf.gz", + "sample2/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", + "sample2/variants/severus", + "sample2/variants/severus/all_SVs", + "sample2/variants/severus/all_SVs/breakpoint_clusters.tsv", + "sample2/variants/severus/all_SVs/breakpoint_clusters_list.tsv", + "sample2/variants/severus/all_SVs/severus_all.vcf.gz", + "sample2/variants/severus/breakpoints_double.csv", + "sample2/variants/severus/read_ids.csv", + "sample2/variants/severus/read_qual.txt", + "sample2/variants/severus/severus.log", + "sample2/variants/severus/somatic_SVs", + "sample2/variants/severus/somatic_SVs/breakpoint_clusters.tsv", + "sample2/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", + "sample2/variants/severus/somatic_SVs/severus_somatic.vcf.gz", + "sample2/vep", + "sample2/vep/SVs", + "sample2/vep/SVs/sample2_SV_VEP.vcf.gz", + "sample2/vep/SVs/sample2_SV_VEP.vcf.gz.tbi", + "sample2/vep/SVs/sample2_SV_VEP.vcf.gz_summary.html", + "sample2/vep/germline", + "sample2/vep/germline/sample2_GERMLINE_VEP.vcf.gz", + "sample2/vep/germline/sample2_GERMLINE_VEP.vcf.gz.tbi", + "sample2/vep/germline/sample2_GERMLINE_VEP.vcf.gz_summary.html", + "sample2/vep/somatic", + "sample2/vep/somatic/sample2_SOMATIC_VEP.vcf.gz", + "sample2/vep/somatic/sample2_SOMATIC_VEP.vcf.gz.tbi", + "sample2/vep/somatic/sample2_SOMATIC_VEP.vcf.gz_summary.html", + "sample3", + "sample3/bamfiles", + "sample3/bamfiles/sample3_tumor.bam", + "sample3/bamfiles/sample3_tumor.bam.bai", + "sample3/qc", + "sample3/qc/tumor", + "sample3/qc/tumor/cramino_aln", + "sample3/qc/tumor/cramino_aln/sample3_tumor_cramino.txt", + "sample3/qc/tumor/cramino_ubam_1", + "sample3/qc/tumor/cramino_ubam_1/sample3_tumor_cramino.txt", + "sample3/qc/tumor/mosdepth", + "sample3/qc/tumor/mosdepth/sample3.mosdepth.global.dist.txt", + "sample3/qc/tumor/mosdepth/sample3.mosdepth.summary.txt", + "sample3/qc/tumor/nanoplot_aln", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_NanoPlot-report.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_NanoStats.txt", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_Non_weightedHistogramReadlength.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_Non_weightedLogTransformed_HistogramReadlength.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_PercentIdentityHistogramDynamic_Histogram_percent_identity.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_PercentIdentityvsAlignedReadLength_dot.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_PercentIdentityvsAlignedReadLength_kde.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_WeightedHistogramReadlength.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_WeightedLogTransformed_HistogramReadlength.html", + "sample3/qc/tumor/nanoplot_aln/sample3_tumor_aln_Yield_By_Length.html", + "sample3/qc/tumor/nanoplot_ubam_1", + "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_NanoPlot-report.html", + "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_NanoStats.txt", + "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_Non_weightedHistogramReadlength.html", + "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_Non_weightedLogTransformed_HistogramReadlength.html", + "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_WeightedHistogramReadlength.html", + "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_WeightedLogTransformed_HistogramReadlength.html", + "sample3/qc/tumor/nanoplot_ubam_1/sample3_tumor_ubam_Yield_By_Length.html", + "sample3/qc/tumor/samtools", + "sample3/qc/tumor/samtools/sample3.flagstat", + "sample3/qc/tumor/samtools/sample3.idxstats", + "sample3/qc/tumor/samtools/sample3.stats", + "sample3/qc/whatshap_stats", + "sample3/qc/whatshap_stats/sample3_whatshap_stats.gtf", + "sample3/qc/whatshap_stats/sample3_whatshap_stats.log", + "sample3/qc/whatshap_stats/sample3_whatshap_stats.tsv", + "sample3/variants", + "sample3/variants/clairsto", + "sample3/variants/clairsto/germline.vcf.gz", + "sample3/variants/clairsto/germline.vcf.gz.tbi", + "sample3/variants/clairsto/indel.vcf.gz", + "sample3/variants/clairsto/indel.vcf.gz.tbi", + "sample3/variants/clairsto/snv.vcf.gz", + "sample3/variants/clairsto/snv.vcf.gz.tbi", + "sample3/variants/clairsto/somatic.vcf.gz", + "sample3/variants/clairsto/somatic.vcf.gz.tbi", + "sample3/variants/deepsomatic", + "sample3/variants/deepsomatic/sample3.vcf.gz", + "sample3/variants/deepsomatic/sample3.vcf.gz.tbi", + "sample3/variants/deepvariant", + "sample3/variants/deepvariant/sample3.g.vcf.gz", + "sample3/variants/deepvariant/sample3.g.vcf.gz.tbi", + "sample3/variants/deepvariant/sample3.vcf.gz", + "sample3/variants/deepvariant/sample3.vcf.gz.tbi", + "sample3/variants/phased", + "sample3/variants/phased/germline_smallvariants.vcf.gz", + "sample3/variants/phased/germline_smallvariants.vcf.gz.tbi", + "sample3/variants/phased/germline_smallvariants_mod.vcf.gz", + "sample3/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", + "sample3/variants/phased/somatic_smallvariants.vcf.gz", + "sample3/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample3/variants/phased/somatic_smallvariants_mod.vcf.gz", + "sample3/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", + "sample3/variants/severus", + "sample3/variants/severus/all_SVs", + "sample3/variants/severus/all_SVs/breakpoint_clusters.tsv", + "sample3/variants/severus/all_SVs/breakpoint_clusters_list.tsv", + "sample3/variants/severus/all_SVs/severus_all.vcf.gz", + "sample3/variants/severus/breakpoints_double.csv", + "sample3/variants/severus/read_ids.csv", + "sample3/variants/severus/read_qual.txt", + "sample3/variants/severus/severus.log", + "sample3/variants/severus/somatic_SVs", + "sample3/variants/severus/somatic_SVs/breakpoint_clusters.tsv", + "sample3/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", + "sample3/variants/severus/somatic_SVs/severus_somatic.vcf.gz", + "sample3/vep", + "sample3/vep/SVs", + "sample3/vep/SVs/sample3_SV_VEP.vcf.gz", + "sample3/vep/SVs/sample3_SV_VEP.vcf.gz.tbi", + "sample3/vep/SVs/sample3_SV_VEP.vcf.gz_summary.html", + "sample3/vep/germline", + "sample3/vep/germline/sample3_GERMLINE_VEP.vcf.gz", + "sample3/vep/germline/sample3_GERMLINE_VEP.vcf.gz.tbi", + "sample3/vep/germline/sample3_GERMLINE_VEP.vcf.gz_summary.html", + "sample3/vep/somatic", + "sample3/vep/somatic/sample3_SOMATIC_VEP.vcf.gz", + "sample3/vep/somatic/sample3_SOMATIC_VEP.vcf.gz.tbi", + "sample3/vep/somatic/sample3_SOMATIC_VEP.vcf.gz_summary.html" ], [ - + "sample1_normal.bam:md5,92a00e311e085a34d443cb64694ce839", + "sample1_normal.bam.bai:md5,c7dff8adc4c8d33a81fb8ea7dff4a98e", + "sample1_tumor.bam:md5,c6c79808f928393b23ae53976a7304c3", + "sample1_tumor.bam.bai:md5,317d711c0c26d2cfb933ad53b69da1d7", + "sample1.flagstat:md5,1c41ea9923945501eb7e41f83a90502d", + "sample1.idxstats:md5,902e503387799123ea59255e3fca172c", + "sample1.stats:md5,70fabbdc07dec0479b3fc7dcec344054", + "sample1.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", + "sample1.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", + "sample1.stats:md5,5012c82d3d3ca60ffdd2fb970f772566", + "sample1_whatshap_stats.gtf:md5,32e249c78790982098e4b0a606171d69", + "sample1_whatshap_stats.log:md5,407b5fcdfbeec1830b2ed6f65f1c2c18", + "sample1_whatshap_stats.tsv:md5,41290e994b5e6dab5ed696925cbb0716", + "sample1.vcf.gz:md5,92c0fa9016c3d8b192eb382fd6a81199", + "sample1.vcf.gz.tbi:md5,04e82577dc57f80b9db25897389364b2", + "sample1.g.vcf.gz:md5,5ed06f35ccecf7aadbec54873dc07e64", + "sample1.g.vcf.gz.tbi:md5,ca628ef368d34a7a6c77098a4c4bdf36", + "sample1.vcf.gz:md5,ad971a535d2b8014fabdabe72995a5db", + "sample1.vcf.gz.tbi:md5,65de5caace8d4312afa707c3bfd6fa45", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", + "breakpoints_double.csv:md5,d63f058075bfe791248954ca2ee6c4fb", + "read_qual.txt:md5,78247dfa2ea336eac0e128eba5e9eef4", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", + "sample2_normal.bam:md5,ed6aab4d2ef70537390a0ce85276ad2c", + "sample2_normal.bam.bai:md5,fb92c167c63e7f62d0c4c3fea8bd1b60", + "sample2_tumor.bam:md5,10c29ced5ed253731ca50097d9c848e3", + "sample2_tumor.bam.bai:md5,e0aa6d8d594070e753145503520fffab", + "sample2.flagstat:md5,714d0cc0c213e2640e54a16f3d0e6e7e", + "sample2.idxstats:md5,72eb83bb11748dc863fef1a0a5497e4b", + "sample2.stats:md5,87cb6e9adf8a133244e8b331be43bb14", + "sample2.flagstat:md5,4344a8745efef9cc2a017024218d61c6", + "sample2.idxstats:md5,69467fc02c83a30084736aeea8b785fb", + "sample2.stats:md5,1e044857eeefb284fda88ee58ff7a04a", + "sample2_whatshap_stats.gtf:md5,af33281699a1d0da83fbe7eaff198d03", + "sample2_whatshap_stats.log:md5,8f5f400786f32871c16e523d9e236fc4", + "sample2_whatshap_stats.tsv:md5,e8b67840491b7d092ac3d5d91db0ff46", + "sample2.vcf.gz:md5,f9d6266ee49c2544dc28eef0cdd9dcae", + "sample2.vcf.gz.tbi:md5,433ecd219947787d00dd0fada6307661", + "sample2.g.vcf.gz:md5,7998d15ce582ec94e86568e3fae654f9", + "sample2.g.vcf.gz.tbi:md5,9498e3266900eee2448ff05b0bce87c0", + "sample2.vcf.gz:md5,33330d8444a1774864883b33e1e2235c", + "sample2.vcf.gz.tbi:md5,384b8b5ea7c1559b464031f8e0863532", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", + "breakpoints_double.csv:md5,a68d7fc9c7a7cb2f31e73189c5412f7b", + "read_qual.txt:md5,8b92ff7dc4536188be159b95525511cd", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", + "sample3_tumor.bam:md5,5020a416186da412d9e89f7efac64178", + "sample3_tumor.bam.bai:md5,fc26fd7d9b388e6551898aacf5ed0c7b", + "sample3.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", + "sample3.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", + "sample3.stats:md5,d7a8552a8a41a217954a0c825d468a60", + "sample3_whatshap_stats.gtf:md5,415b20e0cc30409d24501d64b185dc49", + "sample3_whatshap_stats.log:md5,99a842c8f8f3259ec66b68e8fe0345e6", + "sample3_whatshap_stats.tsv:md5,a65d179e31756ae4127f0bf74da7e701", + "sample3.vcf.gz:md5,ee4f7a5bbd471c1370accf888b8262c0", + "sample3.vcf.gz.tbi:md5,944349ec46ecc95bab7db2f6848d0c36", + "sample3.g.vcf.gz:md5,64adc407ef2434254e76763b72c67b08", + "sample3.g.vcf.gz.tbi:md5,ac0397f7a840cf618d56f097defba878", + "sample3.vcf.gz:md5,9c52a803d796157a08b565c653d4685c", + "sample3.vcf.gz.tbi:md5,f0b3599e4281a6f3bbb8bc408740e58e", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", + "breakpoints_double.csv:md5,46e03bf2d67aa736b599f00fe7f01e06", + "read_qual.txt:md5,b918430d35354dad1d7f02f21e4cd4ed", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50" ] ], "meta": { "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-24T14:22:30.952285683" + "timestamp": "2026-03-24T14:54:34.678333789" } } \ No newline at end of file From e5d8ae6172d9d5e763ee99591a65fe237a0f5f9b Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 24 Mar 2026 14:58:16 +0100 Subject: [PATCH 22/36] prettier --- modules.json | 156 +++++++++++++-------------------------------------- 1 file changed, 39 insertions(+), 117 deletions(-) diff --git a/modules.json b/modules.json index 8db781ed..b774fa63 100644 --- a/modules.json +++ b/modules.json @@ -8,248 +8,180 @@ "ascat": { "branch": "master", "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/ascat/ascat.diff" }, "bcftools/annotate": { "branch": "master", "git_sha": "3d9c2f4beaa4f62b3f006928fd9095a496d1e5a8", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/concat": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", - "installed_by": [ - "modules", - "vcf_gather_bcftools" - ] + "installed_by": ["modules", "vcf_gather_bcftools"] }, "bcftools/isec": { "branch": "master", "git_sha": "3b2c3559699a7bca6a7c2b220695a072e030e17d", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/isec/bcftools-isec.diff" }, "bcftools/merge": { "branch": "master", "git_sha": "3d9c2f4beaa4f62b3f006928fd9095a496d1e5a8", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/merge/bcftools-merge.diff" }, "bcftools/norm": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "bcftools/query": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/bcftools/query/bcftools-query.diff" }, "bcftools/sort": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", - "installed_by": [ - "modules", - "vcf_gather_bcftools" - ], + "installed_by": ["modules", "vcf_gather_bcftools"], "patch": "modules/nf-core/bcftools/sort/bcftools-sort.diff" }, "deepvariant/callvariants": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": [ - "deepvariant" - ], + "installed_by": ["deepvariant"], "patch": "modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff" }, "deepvariant/makeexamples": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": [ - "deepvariant" - ], + "installed_by": ["deepvariant"], "patch": "modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff" }, "deepvariant/postprocessvariants": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": [ - "deepvariant" - ], + "installed_by": ["deepvariant"], "patch": "modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff" }, "ensemblvep/download": { "branch": "master", "git_sha": "90cdd21fd96ccbdb3bc90797ca69570d18391055", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "ensemblvep/vep": { "branch": "master", "git_sha": "890fdcff71928fc1470d3e669d4c430c8c770297", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/ensemblvep/vep/ensemblvep-vep.diff" }, "longphase/haplotag": { "branch": "master", "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "longphase/phase": { "branch": "master", "git_sha": "47983538e45e539f783ed8ab0d1c96d39df2af8f", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/longphase/phase/longphase-phase.diff" }, "minimap2/align": { "branch": "master", "git_sha": "5c9f8d5b7671237c906abadc9ff732b301ca15ca", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/minimap2/align/minimap2-align.diff" }, "minimap2/index": { "branch": "master", "git_sha": "14980f759266eec42dac401fcafeb83d6c957b41", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "modkit/pileup": { "branch": "master", "git_sha": "3d81317a30d1016b533982d6b84df07713ae520a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "mosdepth": { "branch": "master", "git_sha": "6832b69ef7f98c54876d6436360b6b945370c615", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "multiqc": { "branch": "master", "git_sha": "2c73cc8fa92cf48de3da0b643fdf357a8a290b36", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "nanoplot": { "branch": "master", "git_sha": "682f789f93070bd047868300dd018faf3d434e7c", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "pigz/uncompress": { "branch": "master", "git_sha": "f84336b7fa91a65aa61d215b8c109fbb8e4b4ac6", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/cat": { "branch": "master", "git_sha": "f9edc59be2fe25bb6fc73ca4dfc0d28246f2a2d6", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/faidx": { "branch": "master", "git_sha": "b2e78932ef01165fd85829513eaca29eff8e640a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/flagstat": { "branch": "master", "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", - "installed_by": [ - "bam_stats_samtools" - ] + "installed_by": ["bam_stats_samtools"] }, "samtools/idxstats": { "branch": "master", "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", - "installed_by": [ - "bam_stats_samtools" - ] + "installed_by": ["bam_stats_samtools"] }, "samtools/index": { "branch": "master", "git_sha": "1d2fbdcbca677bbe8da0f9d0d2bb7c02f2cab1c9", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "samtools/stats": { "branch": "master", "git_sha": "fe93fde0845f907fc91ad7cc7d797930408824df", - "installed_by": [ - "bam_stats_samtools" - ], + "installed_by": ["bam_stats_samtools"], "patch": "modules/nf-core/samtools/stats/samtools-stats.diff" }, "severus": { "branch": "master", "git_sha": "4dd9d8439a429c7ee566e0e2347f76ddeef27e66", - "installed_by": [ - "modules" - ], + "installed_by": ["modules"], "patch": "modules/nf-core/severus/severus.diff" }, "untar": { "branch": "master", "git_sha": "447f7bc0fa41dfc2400c8cad4c0291880dc060cf", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "unzip": { "branch": "master", "git_sha": "4dd9d8439a429c7ee566e0e2347f76ddeef27e66", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "wget": { "branch": "master", "git_sha": "41dfa3f7c0ffabb96a6a813fe321c6d1cc5b6e46", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] }, "whatshap/stats": { "branch": "master", "git_sha": "bfab71f4d68c1aaff09335a3433e7b2836918b2a", - "installed_by": [ - "modules" - ] + "installed_by": ["modules"] } } }, @@ -258,41 +190,31 @@ "bam_stats_samtools": { "branch": "master", "git_sha": "7ac6cbe7c17c2dad685da7f70496c8f48ea48687", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "deepvariant": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": [ - "subworkflows" - ], + "installed_by": ["subworkflows"], "patch": "subworkflows/nf-core/deepvariant/deepvariant.diff" }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfcore_pipeline": { "branch": "master", "git_sha": "65f5e638d901a51534c68fd5c1c19e8112fb4df1", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] }, "utils_nfschema_plugin": { "branch": "master", "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", - "installed_by": [ - "subworkflows" - ] + "installed_by": ["subworkflows"] } } } } } -} \ No newline at end of file +} From 69a8edd6fa75134af7f661b0b2b1bfbd74cdee22 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 24 Mar 2026 15:02:52 +0100 Subject: [PATCH 23/36] repatch isec --- modules/nf-core/bcftools/isec/bcftools-isec.diff | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/modules/nf-core/bcftools/isec/bcftools-isec.diff b/modules/nf-core/bcftools/isec/bcftools-isec.diff index 36911c50..bea9e9d9 100644 --- a/modules/nf-core/bcftools/isec/bcftools-isec.diff +++ b/modules/nf-core/bcftools/isec/bcftools-isec.diff @@ -28,6 +28,16 @@ Changes in 'bcftools/isec/main.nf': """ stub: +@@ -43,5 +52,9 @@ + touch ${prefix}/0000.vcf.gz.tbi + echo "" | gzip > ${prefix}/0001.vcf.gz + touch ${prefix}/0001.vcf.gz.tbi ++ echo "" | gzip > ${prefix}/0002.vcf.gz ++ touch ${prefix}/0002.vcf.gz.tbi ++ echo "" | gzip > ${prefix}/0003.vcf.gz ++ touch ${prefix}/0003.vcf.gz.tbi + """ + } 'modules/nf-core/bcftools/isec/environment.yml' is unchanged 'modules/nf-core/bcftools/isec/tests/main.nf.test' is unchanged From 3e6607891f51af96b7be579dab73b7da0ef22d5f Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 24 Mar 2026 15:07:49 +0100 Subject: [PATCH 24/36] repatch bcftools query --- modules/nf-core/bcftools/query/bcftools-query.diff | 3 ++- modules/nf-core/bcftools/query/main.nf | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/nf-core/bcftools/query/bcftools-query.diff b/modules/nf-core/bcftools/query/bcftools-query.diff index 6ad99032..790c3808 100644 --- a/modules/nf-core/bcftools/query/bcftools-query.diff +++ b/modules/nf-core/bcftools/query/bcftools-query.diff @@ -13,7 +13,7 @@ Changes in 'bcftools/query/main.nf': tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools when: -@@ -35,12 +36,16 @@ +@@ -35,12 +36,17 @@ ${args} \\ ${vcf} \\ > ${prefix}.${suffix} @@ -28,6 +28,7 @@ Changes in 'bcftools/query/main.nf': - touch ${prefix}.${suffix} \\ + touch ${prefix}.${suffix} + bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz ++ touch ${prefix}.${suffix}.gz.tbi + tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz """ } diff --git a/modules/nf-core/bcftools/query/main.nf b/modules/nf-core/bcftools/query/main.nf index 168a2ad9..4d2da568 100644 --- a/modules/nf-core/bcftools/query/main.nf +++ b/modules/nf-core/bcftools/query/main.nf @@ -46,6 +46,7 @@ process BCFTOOLS_QUERY { """ touch ${prefix}.${suffix} bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz + touch ${prefix}.${suffix}.gz.tbi tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz """ } From bcb68a1bf341e77c8a228cb77cb80dc1ac188d22 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 24 Mar 2026 15:48:36 +0100 Subject: [PATCH 25/36] fixed co-pilot fix --- modules/local/vcfsplit/main.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/vcfsplit/main.nf b/modules/local/vcfsplit/main.nf index 9f5759d8..f6156d34 100644 --- a/modules/local/vcfsplit/main.nf +++ b/modules/local/vcfsplit/main.nf @@ -31,8 +31,8 @@ process VCFSPLIT { bcftools concat -a -Oz -o somatic.vcf.gz indels_pass.vcf.gz snv_pass.vcf.gz tabix -p vcf somatic.vcf.gz - bcftools view -i 'FILTER=="NonSomatic" || FILTER=="Verdict_Germline"' $indel_vcf | bgzip -c > indels_filtered.vcf.gz - bcftools view -i 'FILTER=="NonSomatic" || FILTER=="Verdict_Germline"' $snv_vcf | bgzip -c > snv_filtered.vcf.gz + bcftools view -i 'FILTER~"NonSomatic" || INFO/Verdict_Germline=1' $indel_vcf | bgzip -c > indels_filtered.vcf.gz + bcftools view -i 'FILTER~"NonSomatic" || INFO/Verdict_Germline=1' $snv_vcf | bgzip -c > snv_filtered.vcf.gz tabix -p vcf indels_filtered.vcf.gz tabix -p vcf snv_filtered.vcf.gz bcftools concat -a -Oz -o germline_tmp.vcf.gz indels_filtered.vcf.gz snv_filtered.vcf.gz From eff925715b9cceed339211413a2d6a20ef7de7c9 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 24 Mar 2026 16:18:36 +0100 Subject: [PATCH 26/36] fix value channel --- subworkflows/local/prepare_reference_files.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/prepare_reference_files.nf b/subworkflows/local/prepare_reference_files.nf index 324815e2..42e2e959 100644 --- a/subworkflows/local/prepare_reference_files.nf +++ b/subworkflows/local/prepare_reference_files.nf @@ -36,7 +36,7 @@ workflow PREPARE_REFERENCE_FILES { ch_prepared_fasta = UNZIP_FASTA.out.file ch_versions = ch_versions.mix(UNZIP_FASTA.out.versions) } else { - ch_prepared_fasta = [ [:], fasta ] + ch_prepared_fasta = channel.value([ [:], fasta ]) } // ch_prepared_fasta: [[:], fasta_path] -- empty meta; uncompressed if input was .gz From 00be26e7cc82c3d8f94a83f9808b71499ca661b7 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 24 Mar 2026 16:27:19 +0100 Subject: [PATCH 27/36] fixed severus channel --- workflows/lrsomatic.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index 3e600273..0a9f10de 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -703,7 +703,7 @@ workflow LRSOMATIC { // if (!params.skip_ascat) { - severus_reformat + severus_input .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, _vcf -> return [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] } @@ -731,7 +731,7 @@ workflow LRSOMATIC { if (!params.skip_wakhan) { // Prepare input channel for WAKHAN - severus_reformat + severus_input .join(SEVERUS.out.all_vcf) .set { wakhan_input } // [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, phased_tbi, severus_all_vcf] From ee028d37bd2b515713ec47c8dae0841c1cc83f58 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Wed, 25 Mar 2026 10:15:50 +0100 Subject: [PATCH 28/36] fix snap and change modcall label --- conf/modules.config | 5 ++++- modules/local/longphase/modcall/main.nf | 2 +- tests/.nftignore | 2 ++ tests/default.nf.test | 13 +++++++++++++ tests/default.nf.test.snap | 26 +------------------------ 5 files changed, 21 insertions(+), 27 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index cb37c839..cce129da 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -170,8 +170,11 @@ process { publishDir = [ path: { "${params.outdir}/${meta.id}/variants/deepvariant" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> + (filename.equals('versions.yml') || filename.endsWith('.g.vcf.gz') || filename.endsWith('.g.vcf.gz.tbi')) ? null : filename + } ] + } withName: '.*DEEPVARIANT_CALLVARIANTS' { diff --git a/modules/local/longphase/modcall/main.nf b/modules/local/longphase/modcall/main.nf index dd04482e..2390c34d 100644 --- a/modules/local/longphase/modcall/main.nf +++ b/modules/local/longphase/modcall/main.nf @@ -1,6 +1,6 @@ process LONGPHASE_MODCALL { tag "$meta.id" - label 'process_medium' + label 'process_very_high' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/tests/.nftignore b/tests/.nftignore index 7a9121a9..a1de7635 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -25,3 +25,5 @@ pipeline_info/*.{html,json,txt,yml} */qc/{tumor,normal}/nanoplot_ubam_*/*.txt */qc/{tumor,normal}/nanoplot_aln/*.txt */qc/{tumor,normal}/mosdepth/*.txt +*/variants/deepsomatic/*.{vcf.gz,vcf.gz.tbi} +*/variants/deepvariant/*.{vcf.gz,vcf.gz.tbi} diff --git a/tests/default.nf.test b/tests/default.nf.test index 160dc01e..a7a4281c 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -42,6 +42,19 @@ nextflow_pipeline { assert file("$launchDir/output/sample3/variants/clairsto/snv.vcf.gz").exists() assert file("$launchDir/output/sample3/variants/clairsto/somatic.vcf.gz").exists() assert file("$launchDir/output/sample3/variants/clairsto/germline.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/deepvariant/sample1.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/deepvariant/sample1.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample2/variants/deepvariant/sample2.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/deepvariant/sample2.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample3/variants/deepvariant/sample3.vcf.gz").exists() + assert file("$launchDir/output/sample3/variants/deepvariant/sample3.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample1/variants/deepsomatic/sample1.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/deepsomatic/sample1.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample2/variants/deepsomatic/sample2.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/deepsomatic/sample2.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample3/variants/deepsomatic/sample3.vcf.gz").exists() + assert file("$launchDir/output/sample3/variants/deepsomatic/sample3.vcf.gz.tbi").exists() + }, { assert snapshot( // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 6b04accb..75ce6abf 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -284,8 +284,6 @@ "sample1/variants/deepsomatic/sample1.vcf.gz", "sample1/variants/deepsomatic/sample1.vcf.gz.tbi", "sample1/variants/deepvariant", - "sample1/variants/deepvariant/sample1.g.vcf.gz", - "sample1/variants/deepvariant/sample1.g.vcf.gz.tbi", "sample1/variants/deepvariant/sample1.vcf.gz", "sample1/variants/deepvariant/sample1.vcf.gz.tbi", "sample1/variants/phased", @@ -409,8 +407,6 @@ "sample2/variants/deepsomatic/sample2.vcf.gz", "sample2/variants/deepsomatic/sample2.vcf.gz.tbi", "sample2/variants/deepvariant", - "sample2/variants/deepvariant/sample2.g.vcf.gz", - "sample2/variants/deepvariant/sample2.g.vcf.gz.tbi", "sample2/variants/deepvariant/sample2.vcf.gz", "sample2/variants/deepvariant/sample2.vcf.gz.tbi", "sample2/variants/phased", @@ -502,8 +498,6 @@ "sample3/variants/deepsomatic/sample3.vcf.gz", "sample3/variants/deepsomatic/sample3.vcf.gz.tbi", "sample3/variants/deepvariant", - "sample3/variants/deepvariant/sample3.g.vcf.gz", - "sample3/variants/deepvariant/sample3.g.vcf.gz.tbi", "sample3/variants/deepvariant/sample3.vcf.gz", "sample3/variants/deepvariant/sample3.vcf.gz.tbi", "sample3/variants/phased", @@ -556,12 +550,6 @@ "sample1_whatshap_stats.gtf:md5,32e249c78790982098e4b0a606171d69", "sample1_whatshap_stats.log:md5,407b5fcdfbeec1830b2ed6f65f1c2c18", "sample1_whatshap_stats.tsv:md5,41290e994b5e6dab5ed696925cbb0716", - "sample1.vcf.gz:md5,92c0fa9016c3d8b192eb382fd6a81199", - "sample1.vcf.gz.tbi:md5,04e82577dc57f80b9db25897389364b2", - "sample1.g.vcf.gz:md5,5ed06f35ccecf7aadbec54873dc07e64", - "sample1.g.vcf.gz.tbi:md5,ca628ef368d34a7a6c77098a4c4bdf36", - "sample1.vcf.gz:md5,ad971a535d2b8014fabdabe72995a5db", - "sample1.vcf.gz.tbi:md5,65de5caace8d4312afa707c3bfd6fa45", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", "breakpoints_double.csv:md5,d63f058075bfe791248954ca2ee6c4fb", @@ -581,12 +569,6 @@ "sample2_whatshap_stats.gtf:md5,af33281699a1d0da83fbe7eaff198d03", "sample2_whatshap_stats.log:md5,8f5f400786f32871c16e523d9e236fc4", "sample2_whatshap_stats.tsv:md5,e8b67840491b7d092ac3d5d91db0ff46", - "sample2.vcf.gz:md5,f9d6266ee49c2544dc28eef0cdd9dcae", - "sample2.vcf.gz.tbi:md5,433ecd219947787d00dd0fada6307661", - "sample2.g.vcf.gz:md5,7998d15ce582ec94e86568e3fae654f9", - "sample2.g.vcf.gz.tbi:md5,9498e3266900eee2448ff05b0bce87c0", - "sample2.vcf.gz:md5,33330d8444a1774864883b33e1e2235c", - "sample2.vcf.gz.tbi:md5,384b8b5ea7c1559b464031f8e0863532", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", "breakpoints_double.csv:md5,a68d7fc9c7a7cb2f31e73189c5412f7b", @@ -601,12 +583,6 @@ "sample3_whatshap_stats.gtf:md5,415b20e0cc30409d24501d64b185dc49", "sample3_whatshap_stats.log:md5,99a842c8f8f3259ec66b68e8fe0345e6", "sample3_whatshap_stats.tsv:md5,a65d179e31756ae4127f0bf74da7e701", - "sample3.vcf.gz:md5,ee4f7a5bbd471c1370accf888b8262c0", - "sample3.vcf.gz.tbi:md5,944349ec46ecc95bab7db2f6848d0c36", - "sample3.g.vcf.gz:md5,64adc407ef2434254e76763b72c67b08", - "sample3.g.vcf.gz.tbi:md5,ac0397f7a840cf618d56f097defba878", - "sample3.vcf.gz:md5,9c52a803d796157a08b565c653d4685c", - "sample3.vcf.gz.tbi:md5,f0b3599e4281a6f3bbb8bc408740e58e", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", "breakpoints_double.csv:md5,46e03bf2d67aa736b599f00fe7f01e06", @@ -619,6 +595,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-24T14:54:34.678333789" + "timestamp": "2026-03-25T10:14:49.980270841" } } \ No newline at end of file From 48a71f692f43a6793442752dc085df05a0982311 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Thu, 26 Mar 2026 14:45:03 +0100 Subject: [PATCH 29/36] fix channel structure, update longphase --- modules.json | 4 +- .../local/longphase/modcall/environment.yml | 9 + modules/local/longphase/modcall/main.nf | 4 +- .../longphase/haplotag/environment.yml | 4 +- modules/nf-core/longphase/haplotag/main.nf | 16 +- modules/nf-core/longphase/haplotag/meta.yml | 36 ++-- .../longphase/haplotag/tests/main.nf.test | 10 +- .../haplotag/tests/main.nf.test.snap | 121 ++++++------ .../nf-core/longphase/phase/environment.yml | 4 +- modules/nf-core/longphase/phase/main.nf | 26 +-- modules/nf-core/longphase/phase/meta.yml | 48 +++-- .../longphase/phase/tests/main.nf.test | 10 +- .../longphase/phase/tests/main.nf.test.snap | 180 +++++------------- workflows/lrsomatic.nf | 7 +- 14 files changed, 204 insertions(+), 275 deletions(-) create mode 100644 modules/local/longphase/modcall/environment.yml diff --git a/modules.json b/modules.json index b774fa63..2ee14816 100644 --- a/modules.json +++ b/modules.json @@ -81,12 +81,12 @@ }, "longphase/haplotag": { "branch": "master", - "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", + "git_sha": "b8d30a43f33aee3148b0e9e9f00587984a4ac195", "installed_by": ["modules"] }, "longphase/phase": { "branch": "master", - "git_sha": "47983538e45e539f783ed8ab0d1c96d39df2af8f", + "git_sha": "b8d30a43f33aee3148b0e9e9f00587984a4ac195", "installed_by": ["modules"], "patch": "modules/nf-core/longphase/phase/longphase-phase.diff" }, diff --git a/modules/local/longphase/modcall/environment.yml b/modules/local/longphase/modcall/environment.yml new file mode 100644 index 00000000..f436bdae --- /dev/null +++ b/modules/local/longphase/modcall/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.23.1 + - bioconda::longphase=2.0.1 diff --git a/modules/local/longphase/modcall/main.nf b/modules/local/longphase/modcall/main.nf index 2390c34d..a49ffa6d 100644 --- a/modules/local/longphase/modcall/main.nf +++ b/modules/local/longphase/modcall/main.nf @@ -4,8 +4,8 @@ process LONGPHASE_MODCALL { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b0/b0184a9a36d8612fbae38bbaad7b52f03b815ad17673740e107cf1f267a1f15d/data': - 'community.wave.seqera.io/library/htslib_longphase:3071e61356fc25a4' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/83/83fce1d397cf71705cc096fc0e0e52f7013bdd471ef68ee53ae765688e5c439c/data': + 'community.wave.seqera.io/library/longphase_samtools:8c61296cae7a5fc0' }" input: tuple val(meta), path(bam), path(bai) diff --git a/modules/nf-core/longphase/haplotag/environment.yml b/modules/nf-core/longphase/haplotag/environment.yml index 3a882a5d..f436bdae 100644 --- a/modules/nf-core/longphase/haplotag/environment.yml +++ b/modules/nf-core/longphase/haplotag/environment.yml @@ -5,5 +5,5 @@ channels: - bioconda dependencies: - - bioconda::htslib=1.22.1 - - bioconda::longphase=2.0 + - bioconda::htslib=1.23.1 + - bioconda::longphase=2.0.1 diff --git a/modules/nf-core/longphase/haplotag/main.nf b/modules/nf-core/longphase/haplotag/main.nf index a64eb7c8..7eb84669 100644 --- a/modules/nf-core/longphase/haplotag/main.nf +++ b/modules/nf-core/longphase/haplotag/main.nf @@ -4,8 +4,8 @@ process LONGPHASE_HAPLOTAG { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b0/b0184a9a36d8612fbae38bbaad7b52f03b815ad17673740e107cf1f267a1f15d/data': - 'community.wave.seqera.io/library/htslib_longphase:3071e61356fc25a4' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/83/83fce1d397cf71705cc096fc0e0e52f7013bdd471ef68ee53ae765688e5c439c/data': + 'community.wave.seqera.io/library/longphase_samtools:8c61296cae7a5fc0' }" input: tuple val(meta), path(bam), path(bai), path(snps), path(svs), path(mods) @@ -16,7 +16,7 @@ process LONGPHASE_HAPLOTAG { output: tuple val(meta), path("*.{bam,cram}"), emit: bam tuple val(meta), path("*.log") , emit: log , optional: true - path "versions.yml" , emit: versions + tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions when: task.ext.when == null || task.ext.when @@ -42,11 +42,6 @@ process LONGPHASE_HAPLOTAG { if [ -f "${prefix}.out" ]; then mv ${prefix}.out ${prefix}.log fi - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') - END_VERSIONS """ stub: @@ -57,10 +52,5 @@ process LONGPHASE_HAPLOTAG { """ touch ${prefix}.${suffix} ${log} - - cat <<-END_VERSIONS > versions.yml - "${task.process}": - longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') - END_VERSIONS """ } diff --git a/modules/nf-core/longphase/haplotag/meta.yml b/modules/nf-core/longphase/haplotag/meta.yml index 63368b31..5c3ad844 100644 --- a/modules/nf-core/longphase/haplotag/meta.yml +++ b/modules/nf-core/longphase/haplotag/meta.yml @@ -1,7 +1,7 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "longphase_haplotag" -description: LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, - small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms. +description: LongPhase is an ultra-fast program for simultaneously co-phasing + SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio + platforms. keywords: - haplotag - long-read @@ -15,9 +15,9 @@ tools: documentation: "https://github.com/twolinin/longphase" tool_dev_url: "https://github.com/twolinin/longphase" doi: "10.1093/bioinformatics/btac058" - licence: ["GPL v3"] + licence: + - "GPL v3" identifier: "" - input: - - meta: type: map @@ -92,13 +92,27 @@ output: description: Log file pattern: "*.log" ontologies: [] + versions_longphase: + - - ${task.process}: + type: string + description: The name of the process + - longphase: + type: string + description: The name of the tool + - "longphase --version | head -n 1 | sed 's/Version: //'": + type: eval + description: The expression to obtain the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - longphase: + type: string + description: The name of the tool + - "longphase --version | head -n 1 | sed 's/Version: //'": + type: eval + description: The expression to obtain the version of the tool authors: - "@fellen31" maintainers: diff --git a/modules/nf-core/longphase/haplotag/tests/main.nf.test b/modules/nf-core/longphase/haplotag/tests/main.nf.test index fb5d9311..687f61e0 100644 --- a/modules/nf-core/longphase/haplotag/tests/main.nf.test +++ b/modules/nf-core/longphase/haplotag/tests/main.nf.test @@ -38,7 +38,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions")}, bam(process.out.bam.get(0).get(1), stringency: 'silent').getHeader(), bam(process.out.bam.get(0).get(1), stringency: 'silent').getReadsMD5(), ).match() } @@ -79,7 +79,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions")}, process.out.log, bam(process.out.bam.get(0).get(1), 'https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/genome/genome.fasta', stringency: 'silent').getHeader()[2..5], bam(process.out.bam.get(0).get(1), 'https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/genome/genome.fasta', stringency: 'silent').getReadsMD5(), @@ -118,7 +118,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions")}, bam(process.out.bam.get(0).get(1), stringency: 'silent').getHeader(), bam(process.out.bam.get(0).get(1), stringency: 'silent').getReadsMD5(), ).match() } @@ -157,7 +157,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } @@ -194,7 +194,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } diff --git a/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap b/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap index 412fb8d0..23287721 100644 --- a/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap +++ b/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap @@ -2,25 +2,6 @@ "[ bam, bai, snps, [], [] ], fasta, fai - log & cram -stub": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" - ], "bam": [ [ { @@ -37,55 +18,51 @@ "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-10-22T11:47:07.246878607" + "timestamp": "2026-03-20T10:13:11.968590854" }, "[ bam, bai, snps, [], [] ], fasta, fai": { "content": [ - [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" - ], + { + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] + ] + }, [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:chr22\tLN:40001", "@RG\tID:test\tSM:test", "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", - "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0.1\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " ], "721264eb2824a3146b331f2532d10180" ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-10-22T11:46:50.605854269" + "timestamp": "2026-03-20T10:12:40.584213389" }, "[ bam, bai, snps, [], [] ], fasta, fai -stub": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" - ], "bam": [ [ { @@ -97,43 +74,59 @@ "log": [ ], - "versions": [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-10-22T11:47:03.645623933" + "timestamp": "2026-03-20T10:13:04.628910585" }, "[ bam, bai, snps, svs, [] ], fasta, fai": { "content": [ - [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" - ], + { + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] + ] + }, [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:chr22\tLN:40001", "@RG\tID:test\tSM:test", "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", - "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam --sv-file NA24385_sv.vcf.gz " + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0.1\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam --sv-file NA24385_sv.vcf.gz " ], "721264eb2824a3146b331f2532d10180" ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-10-22T11:47:00.022853298" + "timestamp": "2026-03-20T10:12:57.997252428" }, "[ bam, bai, snps, [], [] ], fasta, fai - log & cram": { "content": [ - [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" - ], + { + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] + ] + }, [ [ { @@ -146,14 +139,14 @@ "@RG\tID:test\tSM:test", "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", - "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0\tCL:longphase haplotag --log --cram --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0.1\tCL:longphase haplotag --log --cram --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " ], "721264eb2824a3146b331f2532d10180" ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-10-22T11:46:55.93374635" + "timestamp": "2026-03-20T10:12:49.73950987" } } \ No newline at end of file diff --git a/modules/nf-core/longphase/phase/environment.yml b/modules/nf-core/longphase/phase/environment.yml index 3a882a5d..f436bdae 100644 --- a/modules/nf-core/longphase/phase/environment.yml +++ b/modules/nf-core/longphase/phase/environment.yml @@ -5,5 +5,5 @@ channels: - bioconda dependencies: - - bioconda::htslib=1.22.1 - - bioconda::longphase=2.0 + - bioconda::htslib=1.23.1 + - bioconda::longphase=2.0.1 diff --git a/modules/nf-core/longphase/phase/main.nf b/modules/nf-core/longphase/phase/main.nf index 5a20381f..ccd1b715 100644 --- a/modules/nf-core/longphase/phase/main.nf +++ b/modules/nf-core/longphase/phase/main.nf @@ -4,8 +4,8 @@ process LONGPHASE_PHASE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b0/b0184a9a36d8612fbae38bbaad7b52f03b815ad17673740e107cf1f267a1f15d/data': - 'community.wave.seqera.io/library/htslib_longphase:3071e61356fc25a4' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/83/83fce1d397cf71705cc096fc0e0e52f7013bdd471ef68ee53ae765688e5c439c/data': + 'community.wave.seqera.io/library/longphase_samtools:8c61296cae7a5fc0' }" input: tuple val(meta), path(bam), path(bai), path(snvs), path(svs), path(mods) @@ -14,13 +14,10 @@ process LONGPHASE_PHASE { output: - tuple val(meta), path("${prefix}.vcf.gz") , emit: snv_vcf - tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: snv_vcf_index - tuple val(meta), path("${prefix}_SV.vcf.gz") , emit: sv_vcf , optional: true - tuple val(meta), path("${prefix}_SV.vcf.gz.tbi") , emit: sv_vcf_index , optional: true - tuple val(meta), path("${prefix}_mod.vcf.gz") , emit: mod_vcf, optional: true - tuple val(meta), path("${prefix}_mod.vcf.gz.tbi"), emit: mod_vcf_index, optional: true - path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.vcf.gz") , emit: snv_vcf + tuple val(meta), path("${prefix}_SV.vcf.gz") , emit: sv_vcf , optional: true + tuple val(meta), path("${prefix}_mod.vcf.gz"), emit: mod_vcf, optional: true + tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions when: task.ext.when == null || task.ext.when @@ -49,20 +46,9 @@ process LONGPHASE_PHASE { $args2 \\ ${prefix}*.vcf - tabix -p vcf ${prefix}.vcf.gz - - if [ -f ${prefix}_SV.vcf.gz ]; then - tabix -p vcf ${prefix}_SV.vcf.gz - fi - - if [ -f ${prefix}_mod.vcf.gz ]; then - tabix -p vcf ${prefix}_mod.vcf.gz - fi - cat <<-END_VERSIONS > versions.yml "${task.process}": longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') - tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ diff --git a/modules/nf-core/longphase/phase/meta.yml b/modules/nf-core/longphase/phase/meta.yml index 94efc684..266b878b 100644 --- a/modules/nf-core/longphase/phase/meta.yml +++ b/modules/nf-core/longphase/phase/meta.yml @@ -1,7 +1,7 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "longphase_phase" -description: LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, - small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms. +description: LongPhase is an ultra-fast program for simultaneously co-phasing + SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio + platforms. keywords: - phase - long-read @@ -15,9 +15,9 @@ tools: documentation: "https://github.com/twolinin/longphase" tool_dev_url: "https://github.com/twolinin/longphase" doi: "10.1093/bioinformatics/btac058" - licence: ["GPL v3"] + licence: + - "GPL v3" identifier: "" - input: - - meta: type: map @@ -76,43 +76,57 @@ output: description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - "${prefix}.vcf.gz": + - ${prefix}.vcf.gz: type: file description: Compressed VCF file with phased SNVs and indels pattern: "*.vcf.gz" ontologies: - - edam: http://edamontology.org/format_3989 # GZIP format + - edam: http://edamontology.org/format_3989 sv_vcf: - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - "${prefix}_SV.vcf.gz": + - ${prefix}_SV.vcf.gz: type: file description: Compressed VCF file with phased SVs pattern: "*_SV.vcf.gz" ontologies: - - edam: http://edamontology.org/format_3989 # GZIP format + - edam: http://edamontology.org/format_3989 mod_vcf: - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - "${prefix}_mod.vcf.gz": + - ${prefix}_mod.vcf.gz: type: file description: Compressed VCF file with phased modifications pattern: "*.vcf.gz" ontologies: - - edam: http://edamontology.org/format_3989 # GZIP format + - edam: http://edamontology.org/format_3989 + versions_longphase: + - - ${task.process}: + type: string + description: The name of the process + - longphase: + type: string + description: The name of the tool + - "longphase --version | head -n 1 | sed 's/Version: //'": + type: eval + description: The expression to obtain the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - longphase: + type: string + description: The name of the tool + - "longphase --version | head -n 1 | sed 's/Version: //'": + type: eval + description: The expression to obtain the version of the tool authors: - "@fellen31" maintainers: diff --git a/modules/nf-core/longphase/phase/tests/main.nf.test b/modules/nf-core/longphase/phase/tests/main.nf.test index b45bbf01..30c666ba 100644 --- a/modules/nf-core/longphase/phase/tests/main.nf.test +++ b/modules/nf-core/longphase/phase/tests/main.nf.test @@ -38,7 +38,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } @@ -72,7 +72,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } @@ -112,7 +112,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } @@ -148,7 +148,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } @@ -183,7 +183,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } diff --git a/modules/nf-core/longphase/phase/tests/main.nf.test.snap b/modules/nf-core/longphase/phase/tests/main.nf.test.snap index b0cf7144..c6a155f9 100644 --- a/modules/nf-core/longphase/phase/tests/main.nf.test.snap +++ b/modules/nf-core/longphase/phase/tests/main.nf.test.snap @@ -2,28 +2,6 @@ "[ bam, bai, snps, svs, [] ], fasta, fai - stub": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test_SV.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" - ], "mod_vcf": [ ], @@ -43,37 +21,24 @@ "test_SV.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.5" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-11-06T16:06:24.025191062" + "timestamp": "2026-03-20T10:13:50.746589174" }, "[ bam, bai, snps, [], [] ], fasta, fai": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,77d7ca7d16c841d3f552681abef984dc" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" - ], "mod_vcf": [ ], @@ -82,48 +47,30 @@ { "id": "test" }, - "test.vcf.gz:md5,77d7ca7d16c841d3f552681abef984dc" + "test.vcf.gz:md5,73d5f51aea92e09b3d427837066f114c" ] ], "sv_vcf": [ ], - "versions": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.5" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-11-06T16:05:57.029934447" + "timestamp": "2026-03-20T10:13:19.273322013" }, "[ bam, bai, snps, svs, [] ], fasta, fai": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,f26bc442f6a1645bcfaabf989ab9483c" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test_SV.vcf.gz:md5,e1b83c15a21bab57f2b228cc7c7d8be8" - ] - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" - ], "mod_vcf": [ ], @@ -132,7 +79,7 @@ { "id": "test" }, - "test.vcf.gz:md5,f26bc442f6a1645bcfaabf989ab9483c" + "test.vcf.gz:md5,af297491417a5727de21f893b553db37" ] ], "sv_vcf": [ @@ -140,45 +87,27 @@ { "id": "test" }, - "test_SV.vcf.gz:md5,e1b83c15a21bab57f2b228cc7c7d8be8" + "test_SV.vcf.gz:md5,4636e0ac86a86565e5d04b5d1b6a00e7" ] ], - "versions": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.5" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-11-06T16:06:03.319855838" + "timestamp": "2026-03-20T10:13:27.450110496" }, "[ bam x2, bai x2, snps, svs, [] ], fasta, fai": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,5333ba9fa14233d3fdbd8b9e1786b998" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test_SV.vcf.gz:md5,434fd35ae3de2a9187e43932686bfd19" - ] - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" - ], "mod_vcf": [ ], @@ -187,7 +116,7 @@ { "id": "test" }, - "test.vcf.gz:md5,5333ba9fa14233d3fdbd8b9e1786b998" + "test.vcf.gz:md5,f688da3f046717765e879c061510e037" ] ], "sv_vcf": [ @@ -195,40 +124,27 @@ { "id": "test" }, - "test_SV.vcf.gz:md5,434fd35ae3de2a9187e43932686bfd19" + "test_SV.vcf.gz:md5,5336fc5eb9d3421cef66fd18320a4cb8" ] ], - "versions": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.5" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-11-06T16:06:10.867281359" + "timestamp": "2026-03-20T10:13:36.797768748" }, "[ bam, bai, snps, [], [] ], fasta, fai - stub": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" - ], "mod_vcf": [ ], @@ -243,15 +159,19 @@ "sv_vcf": [ ], - "versions": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.5" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-11-06T16:06:17.992733472" + "timestamp": "2026-03-20T10:13:44.86300696" } } \ No newline at end of file diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index 0a9f10de..eaf32b71 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -704,7 +704,7 @@ workflow LRSOMATIC { if (!params.skip_ascat) { severus_input - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, _vcf -> + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, _vcf, _tbi -> return [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] } .set { ascat_ch } @@ -733,8 +733,11 @@ workflow LRSOMATIC { // Prepare input channel for WAKHAN severus_input .join(SEVERUS.out.all_vcf) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, _phased_tbi, all_vcf -> + return [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, all_vcf] + } .set { wakhan_input } - // [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, phased_tbi, severus_all_vcf] + // [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, severus_all_vcf] WAKHAN ( wakhan_input, From fcf2547bacb0cf3c0f0d409680e0a70f540fc67b Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Thu, 26 Mar 2026 16:17:59 +0100 Subject: [PATCH 30/36] fix longphase output --- modules.json | 3 +- .../haplotag/longphase-haplotag.diff | 43 +++++++++++++++++++ modules/nf-core/longphase/haplotag/main.nf | 14 +++++- .../longphase/phase/longphase-phase.diff | 24 ++++++++--- modules/nf-core/longphase/phase/main.nf | 24 ++++++++--- tests/default.nf.test.snap | 22 +++++----- 6 files changed, 105 insertions(+), 25 deletions(-) create mode 100644 modules/nf-core/longphase/haplotag/longphase-haplotag.diff diff --git a/modules.json b/modules.json index 2ee14816..4b49d95a 100644 --- a/modules.json +++ b/modules.json @@ -82,7 +82,8 @@ "longphase/haplotag": { "branch": "master", "git_sha": "b8d30a43f33aee3148b0e9e9f00587984a4ac195", - "installed_by": ["modules"] + "installed_by": ["modules"], + "patch": "modules/nf-core/longphase/haplotag/longphase-haplotag.diff" }, "longphase/phase": { "branch": "master", diff --git a/modules/nf-core/longphase/haplotag/longphase-haplotag.diff b/modules/nf-core/longphase/haplotag/longphase-haplotag.diff new file mode 100644 index 00000000..cb47adc9 --- /dev/null +++ b/modules/nf-core/longphase/haplotag/longphase-haplotag.diff @@ -0,0 +1,43 @@ +Changes in component 'nf-core/longphase/haplotag' +'modules/nf-core/longphase/haplotag/meta.yml' is unchanged +Changes in 'longphase/haplotag/main.nf': +--- modules/nf-core/longphase/haplotag/main.nf ++++ modules/nf-core/longphase/haplotag/main.nf +@@ -16,7 +16,7 @@ + output: + tuple val(meta), path("*.{bam,cram}"), emit: bam + tuple val(meta), path("*.log") , emit: log , optional: true +- tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions ++ path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when +@@ -42,6 +42,11 @@ + if [ -f "${prefix}.out" ]; then + mv ${prefix}.out ${prefix}.log + fi ++ ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') ++ END_VERSIONS + """ + + stub: +@@ -52,5 +57,10 @@ + """ + touch ${prefix}.${suffix} + ${log} ++ ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') ++ END_VERSIONS + """ +-} ++} +'modules/nf-core/longphase/haplotag/environment.yml' is unchanged +'modules/nf-core/longphase/haplotag/tests/main.nf.test' is unchanged +'modules/nf-core/longphase/haplotag/tests/main.nf.test.snap' is unchanged +'modules/nf-core/longphase/haplotag/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/longphase/haplotag/main.nf b/modules/nf-core/longphase/haplotag/main.nf index 7eb84669..9b4d6c88 100644 --- a/modules/nf-core/longphase/haplotag/main.nf +++ b/modules/nf-core/longphase/haplotag/main.nf @@ -16,7 +16,7 @@ process LONGPHASE_HAPLOTAG { output: tuple val(meta), path("*.{bam,cram}"), emit: bam tuple val(meta), path("*.log") , emit: log , optional: true - tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -42,6 +42,11 @@ process LONGPHASE_HAPLOTAG { if [ -f "${prefix}.out" ]; then mv ${prefix}.out ${prefix}.log fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS """ stub: @@ -52,5 +57,10 @@ process LONGPHASE_HAPLOTAG { """ touch ${prefix}.${suffix} ${log} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS """ -} +} \ No newline at end of file diff --git a/modules/nf-core/longphase/phase/longphase-phase.diff b/modules/nf-core/longphase/phase/longphase-phase.diff index d8151f8c..a24c930e 100644 --- a/modules/nf-core/longphase/phase/longphase-phase.diff +++ b/modules/nf-core/longphase/phase/longphase-phase.diff @@ -10,7 +10,7 @@ Changes in 'longphase/phase/main.nf': - tuple val(meta), path("${prefix}.vcf.gz") , emit: snv_vcf - tuple val(meta), path("${prefix}_SV.vcf.gz") , emit: sv_vcf , optional: true - tuple val(meta), path("${prefix}_mod.vcf.gz"), emit: mod_vcf, optional: true -- path "versions.yml" , emit: versions +- tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions + tuple val(meta), path("${prefix}.vcf.gz") , emit: snv_vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: snv_vcf_index + tuple val(meta), path("${prefix}_SV.vcf.gz") , emit: sv_vcf , optional: true @@ -21,13 +21,20 @@ Changes in 'longphase/phase/main.nf': when: task.ext.when == null || task.ext.when -@@ -45,10 +48,13 @@ - --threads $task.cpus \\ +@@ -46,9 +49,20 @@ $args2 \\ ${prefix}*.vcf -+ -+ tabix -p vcf ${prefix}*.vcf.gz ++ tabix -p vcf ${prefix}.vcf.gz ++ ++ if [ -f ${prefix}_SV.vcf.gz ]; then ++ tabix -p vcf ${prefix}_SV.vcf.gz ++ fi ++ ++ if [ -f ${prefix}_mod.vcf.gz ]; then ++ tabix -p vcf ${prefix}_mod.vcf.gz ++ fi ++ cat <<-END_VERSIONS > versions.yml "${task.process}": longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') @@ -35,7 +42,12 @@ Changes in 'longphase/phase/main.nf': END_VERSIONS """ - +@@ -69,4 +83,4 @@ + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ +-} ++} 'modules/nf-core/longphase/phase/environment.yml' is unchanged 'modules/nf-core/longphase/phase/tests/main.nf.test' is unchanged 'modules/nf-core/longphase/phase/tests/main.nf.test.snap' is unchanged diff --git a/modules/nf-core/longphase/phase/main.nf b/modules/nf-core/longphase/phase/main.nf index ccd1b715..28ff6ef9 100644 --- a/modules/nf-core/longphase/phase/main.nf +++ b/modules/nf-core/longphase/phase/main.nf @@ -14,10 +14,13 @@ process LONGPHASE_PHASE { output: - tuple val(meta), path("${prefix}.vcf.gz") , emit: snv_vcf - tuple val(meta), path("${prefix}_SV.vcf.gz") , emit: sv_vcf , optional: true - tuple val(meta), path("${prefix}_mod.vcf.gz"), emit: mod_vcf, optional: true - tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions + tuple val(meta), path("${prefix}.vcf.gz") , emit: snv_vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: snv_vcf_index + tuple val(meta), path("${prefix}_SV.vcf.gz") , emit: sv_vcf , optional: true + tuple val(meta), path("${prefix}_SV.vcf.gz.tbi") , emit: sv_vcf_index , optional: true + tuple val(meta), path("${prefix}_mod.vcf.gz") , emit: mod_vcf, optional: true + tuple val(meta), path("${prefix}_mod.vcf.gz.tbi"), emit: mod_vcf_index, optional: true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -46,9 +49,20 @@ process LONGPHASE_PHASE { $args2 \\ ${prefix}*.vcf + tabix -p vcf ${prefix}.vcf.gz + + if [ -f ${prefix}_SV.vcf.gz ]; then + tabix -p vcf ${prefix}_SV.vcf.gz + fi + + if [ -f ${prefix}_mod.vcf.gz ]; then + tabix -p vcf ${prefix}_mod.vcf.gz + fi + cat <<-END_VERSIONS > versions.yml "${task.process}": longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + tabix: \$(echo \$(tabix -h 2>&1) | sed 's/^.*Version: //; s/ .*\$//') END_VERSIONS """ @@ -69,4 +83,4 @@ process LONGPHASE_PHASE { longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') END_VERSIONS """ -} +} \ No newline at end of file diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 75ce6abf..1eba0cf8 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -537,10 +537,10 @@ "sample3/vep/somatic/sample3_SOMATIC_VEP.vcf.gz_summary.html" ], [ - "sample1_normal.bam:md5,92a00e311e085a34d443cb64694ce839", - "sample1_normal.bam.bai:md5,c7dff8adc4c8d33a81fb8ea7dff4a98e", - "sample1_tumor.bam:md5,c6c79808f928393b23ae53976a7304c3", - "sample1_tumor.bam.bai:md5,317d711c0c26d2cfb933ad53b69da1d7", + "sample1_normal.bam:md5,186e4e8400cce1f02190fa91ad449271", + "sample1_normal.bam.bai:md5,cabfbe44aa1f0fb6cf5b4d54e6c4d811", + "sample1_tumor.bam:md5,2887783d87d9e4dedbbca367d5e4efdb", + "sample1_tumor.bam.bai:md5,9eb6cf08de5a60644fa54c8810e3dc58", "sample1.flagstat:md5,1c41ea9923945501eb7e41f83a90502d", "sample1.idxstats:md5,902e503387799123ea59255e3fca172c", "sample1.stats:md5,70fabbdc07dec0479b3fc7dcec344054", @@ -556,10 +556,10 @@ "read_qual.txt:md5,78247dfa2ea336eac0e128eba5e9eef4", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "sample2_normal.bam:md5,ed6aab4d2ef70537390a0ce85276ad2c", - "sample2_normal.bam.bai:md5,fb92c167c63e7f62d0c4c3fea8bd1b60", - "sample2_tumor.bam:md5,10c29ced5ed253731ca50097d9c848e3", - "sample2_tumor.bam.bai:md5,e0aa6d8d594070e753145503520fffab", + "sample2_normal.bam:md5,aff37a8ad733e11fd20978392810e8d8", + "sample2_normal.bam.bai:md5,64744e26f51927c77fd48c282f6ec07d", + "sample2_tumor.bam:md5,9e14b05c07bde2a4653072cba2161a67", + "sample2_tumor.bam.bai:md5,04f64a62a741c7b725877e5d37ceff2f", "sample2.flagstat:md5,714d0cc0c213e2640e54a16f3d0e6e7e", "sample2.idxstats:md5,72eb83bb11748dc863fef1a0a5497e4b", "sample2.stats:md5,87cb6e9adf8a133244e8b331be43bb14", @@ -575,8 +575,8 @@ "read_qual.txt:md5,8b92ff7dc4536188be159b95525511cd", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "sample3_tumor.bam:md5,5020a416186da412d9e89f7efac64178", - "sample3_tumor.bam.bai:md5,fc26fd7d9b388e6551898aacf5ed0c7b", + "sample3_tumor.bam:md5,13432ff8635f1d142f5f260676930754", + "sample3_tumor.bam.bai:md5,b5ace84a3a8619a93227af01e211b1b9", "sample3.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", "sample3.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", "sample3.stats:md5,d7a8552a8a41a217954a0c825d468a60", @@ -595,6 +595,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-25T10:14:49.980270841" + "timestamp": "2026-03-26T16:13:56.877873308" } } \ No newline at end of file From 6f5a1b188986fd014816a58bd4714ac5674eb366 Mon Sep 17 00:00:00 2001 From: ljwharbers Date: Thu, 26 Mar 2026 20:52:38 +0100 Subject: [PATCH 31/36] small changes --- conf/modules.config | 1 + modules/local/clairs/main.nf | 2 +- modules/local/clairsto/main.nf | 2 +- modules/local/deepsomatic/callvariants/main.nf | 2 +- modules/local/deepsomatic/makeexamples/main.nf | 6 +++--- nextflow.config | 1 + nextflow_schema.json | 2 +- workflows/lrsomatic.nf | 1 + 8 files changed, 10 insertions(+), 7 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index cce129da..5a3cd9a9 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -21,6 +21,7 @@ process { // // QC Processes + // withName: '.*:MULTIQC' { ext.args = { params.multiqc_title ? "--title \"$params.multiqc_title\"" : '' } diff --git a/modules/local/clairs/main.nf b/modules/local/clairs/main.nf index 4342e401..a7a310b5 100644 --- a/modules/local/clairs/main.nf +++ b/modules/local/clairs/main.nf @@ -20,7 +20,7 @@ process CLAIRS { task.ext.when == null || task.ext.when script: - prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" def args = task.ext.args ?: '' """ diff --git a/modules/local/clairsto/main.nf b/modules/local/clairsto/main.nf index 04e73819..7147061e 100644 --- a/modules/local/clairsto/main.nf +++ b/modules/local/clairsto/main.nf @@ -24,7 +24,7 @@ process CLAIRSTO { script: def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" def conda_prefix = workflow.containerEngine == 'singularity' ? '--conda_prefix /opt/micromamba/envs/clairs-to' : '' def pon_string = pon_vcfs.join(',') def flags_string = pon_flags.join(',') diff --git a/modules/local/deepsomatic/callvariants/main.nf b/modules/local/deepsomatic/callvariants/main.nf index 4906b954..10cdb40b 100644 --- a/modules/local/deepsomatic/callvariants/main.nf +++ b/modules/local/deepsomatic/callvariants/main.nf @@ -10,7 +10,7 @@ process DEEPSOMATIC_CALLVARIANTS { tuple val(meta), path(make_examples_tfrecords) output: - tuple val(meta), path("${prefix}.call-*-of-*.tfrecord.gz"), emit: call_variants_tfrecords + tuple val(meta), path("${prefix}.call-*-of-*.tfrecord.gz") , emit: call_variants_tfrecords tuple val("${task.process}"), val('deepsomatic'), val('1.7.0'), topic: versions, emit: versions_deepsomatic when: diff --git a/modules/local/deepsomatic/makeexamples/main.nf b/modules/local/deepsomatic/makeexamples/main.nf index 206e497c..f33300ab 100644 --- a/modules/local/deepsomatic/makeexamples/main.nf +++ b/modules/local/deepsomatic/makeexamples/main.nf @@ -12,9 +12,9 @@ process DEEPSOMATIC_MAKEEXAMPLES { tuple val(meta4), path(gzi) output: - tuple val(meta), path("${prefix}.examples.tfrecord-*-of-*.gz{,.example_info.json}"), emit: examples - tuple val(meta), path("${prefix}.gvcf.tfrecord-*-of-*.gz"), emit: gvcf - tuple val(meta), path("${prefix}_call_variant_outputs.tfrecord-*-of-*.gz", arity: "0..*"), emit: small_model_calls + tuple val(meta), path("${prefix}.examples.tfrecord-*-of-*.gz{,.example_info.json}") , emit: examples + tuple val(meta), path("${prefix}.gvcf.tfrecord-*-of-*.gz") , emit: gvcf + tuple val(meta), path("${prefix}_call_variant_outputs.tfrecord-*-of-*.gz", arity: "0..*") , emit: small_model_calls tuple val("${task.process}"), val('deepsomatic'), val('1.7.0'), topic: versions, emit: versions_deepsomatic when: diff --git a/nextflow.config b/nextflow.config index f8ad334a..3b2b40d4 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,7 @@ params { // Input options input = null + // Small variant calling options germline_var_keep = 'consensus' somatic_var_keep = 'all' trust_caller = 'deepvariant' diff --git a/nextflow_schema.json b/nextflow_schema.json index 204a7742..89f599c3 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -55,7 +55,7 @@ }, "germline_var_keep": { "type": "string", - "description": "specifies which germline variants are used for phasing and annotation. Must be [consensus, all,deepvariant,clair]", + "description": "specifies which germline variants are used for phasing and annotation. Must be [consensus, all, deepvariant, clair]", "default": "consensus", "enum": ["consensus", "all", "deepvariant", "clair"] }, diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index eaf32b71..40f3c9bc 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -159,6 +159,7 @@ workflow LRSOMATIC { [ meta, bam.flatten()] } .set{ch_samplesheet} + // [meta_full, [bam...]] -- meta now includes: id, paired_data, type, platform, sex, fiber, clair3_model, clairS_model, clairSTO_model, kinetics // // SUBWORKFLOW: PREPARE_REFERENCE_FILES From eb86f4a540d163fda90e9c1882174f221422b4ad Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Fri, 27 Mar 2026 14:28:28 +0100 Subject: [PATCH 32/36] revert longphase output versions --- .../nf-core/longphase/haplotag/longphase-haplotag.diff | 9 --------- modules/nf-core/longphase/haplotag/main.nf | 2 +- modules/nf-core/longphase/phase/longphase-phase.diff | 9 +++++---- modules/nf-core/longphase/phase/main.nf | 4 ++-- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/modules/nf-core/longphase/haplotag/longphase-haplotag.diff b/modules/nf-core/longphase/haplotag/longphase-haplotag.diff index cb47adc9..0d93067d 100644 --- a/modules/nf-core/longphase/haplotag/longphase-haplotag.diff +++ b/modules/nf-core/longphase/haplotag/longphase-haplotag.diff @@ -3,15 +3,6 @@ Changes in component 'nf-core/longphase/haplotag' Changes in 'longphase/haplotag/main.nf': --- modules/nf-core/longphase/haplotag/main.nf +++ modules/nf-core/longphase/haplotag/main.nf -@@ -16,7 +16,7 @@ - output: - tuple val(meta), path("*.{bam,cram}"), emit: bam - tuple val(meta), path("*.log") , emit: log , optional: true -- tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions -+ path "versions.yml" , emit: versions - - when: - task.ext.when == null || task.ext.when @@ -42,6 +42,11 @@ if [ -f "${prefix}.out" ]; then mv ${prefix}.out ${prefix}.log diff --git a/modules/nf-core/longphase/haplotag/main.nf b/modules/nf-core/longphase/haplotag/main.nf index 9b4d6c88..d529554a 100644 --- a/modules/nf-core/longphase/haplotag/main.nf +++ b/modules/nf-core/longphase/haplotag/main.nf @@ -16,7 +16,7 @@ process LONGPHASE_HAPLOTAG { output: tuple val(meta), path("*.{bam,cram}"), emit: bam tuple val(meta), path("*.log") , emit: log , optional: true - path "versions.yml" , emit: versions + tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions when: task.ext.when == null || task.ext.when diff --git a/modules/nf-core/longphase/phase/longphase-phase.diff b/modules/nf-core/longphase/phase/longphase-phase.diff index a24c930e..b994f78f 100644 --- a/modules/nf-core/longphase/phase/longphase-phase.diff +++ b/modules/nf-core/longphase/phase/longphase-phase.diff @@ -3,24 +3,25 @@ Changes in component 'nf-core/longphase/phase' Changes in 'longphase/phase/main.nf': --- modules/nf-core/longphase/phase/main.nf +++ modules/nf-core/longphase/phase/main.nf -@@ -14,10 +14,13 @@ +@@ -14,11 +14,14 @@ output: - tuple val(meta), path("${prefix}.vcf.gz") , emit: snv_vcf - tuple val(meta), path("${prefix}_SV.vcf.gz") , emit: sv_vcf , optional: true - tuple val(meta), path("${prefix}_mod.vcf.gz"), emit: mod_vcf, optional: true -- tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions + tuple val(meta), path("${prefix}.vcf.gz") , emit: snv_vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: snv_vcf_index + tuple val(meta), path("${prefix}_SV.vcf.gz") , emit: sv_vcf , optional: true + tuple val(meta), path("${prefix}_SV.vcf.gz.tbi") , emit: sv_vcf_index , optional: true + tuple val(meta), path("${prefix}_mod.vcf.gz") , emit: mod_vcf, optional: true + tuple val(meta), path("${prefix}_mod.vcf.gz.tbi"), emit: mod_vcf_index, optional: true -+ path "versions.yml" , emit: versions - + tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions +- ++ when: task.ext.when == null || task.ext.when + @@ -46,9 +49,20 @@ $args2 \\ ${prefix}*.vcf diff --git a/modules/nf-core/longphase/phase/main.nf b/modules/nf-core/longphase/phase/main.nf index 28ff6ef9..3bc0b715 100644 --- a/modules/nf-core/longphase/phase/main.nf +++ b/modules/nf-core/longphase/phase/main.nf @@ -20,8 +20,8 @@ process LONGPHASE_PHASE { tuple val(meta), path("${prefix}_SV.vcf.gz.tbi") , emit: sv_vcf_index , optional: true tuple val(meta), path("${prefix}_mod.vcf.gz") , emit: mod_vcf, optional: true tuple val(meta), path("${prefix}_mod.vcf.gz.tbi"), emit: mod_vcf_index, optional: true - path "versions.yml" , emit: versions - + tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions + when: task.ext.when == null || task.ext.when From f094a0be982d882629db8727695997e15fac8b50 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Fri, 27 Mar 2026 16:39:26 +0100 Subject: [PATCH 33/36] adressing comments --- conf/igenomes.config | 1 + conf/modules.config | 21 +++++++ conf/test.config | 2 +- .../haplotag/longphase-haplotag.diff | 34 ------------ nextflow.config | 9 ++- nextflow_schema.json | 50 +++++++++++++---- .../local/paired/paired_smallvar_germline.nf | 13 +++-- .../local/paired/paired_smallvar_somatic.nf | 13 +++-- subworkflows/local/small_variant_consensus.nf | 55 +++++++------------ .../local/tumor_only/tumoronly_smallvar.nf | 26 +++++---- workflows/lrsomatic.nf | 18 +++++- 11 files changed, 133 insertions(+), 109 deletions(-) delete mode 100644 modules/nf-core/longphase/haplotag/longphase-haplotag.diff diff --git a/conf/igenomes.config b/conf/igenomes.config index 06bd28a4..d9c42c81 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -43,6 +43,7 @@ params { dbsnp = "${params.igenomes_base}/Homo_sapiens/ClairSTO/CHM13/Annotation/ClairSTO-pon/final_dbsnp.vcf.gz" onekgenomes = "${params.igenomes_base}/Homo_sapiens/ClairSTO/CHM13/Annotation/ClairSTO-pon/final_1kgenomes.vcf.gz" colors = "${params.igenomes_base}/Homo_sapiens/ClairSTO/CHM13/Annotation/ClairSTO-pon/final_colors.vcf.gz" + asap = "${params.igenomes_base}/Homo_sapiens/ClairSTO/CHM13/Annotation/ClairSTO-pon/WGS_CHM13_ASAP.vcf.gz" } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" diff --git a/conf/modules.config b/conf/modules.config index 5a3cd9a9..4fa65136 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -405,6 +405,27 @@ process { enabled: false ] } + withName: '.*:GERMLINE_CONSENSUS:BCFTOOLS_SORT' { + ext.prefix = { "${meta.id}_germline_sorted" } + ext.args = {'-Oz -W=tbi'} + publishDir = [ + enabled: false + ] + } + withName: '.*:SOMATIC_CONSENSUS:BCFTOOLS_SORT' { + ext.prefix = { "${meta.id}_somatic_sorted" } + ext.args = {'-Oz -W=tbi'} + publishDir = [ + enabled: false + ] + } + withName: '.*:PAIRED_SMALLVAR_SOMATIC:BCFTOOLS_SORT' { + ext.prefix = { "${meta.id}_somatic_sorted" } + ext.args = {'-Oz -W=tbi'} + publishDir = [ + enabled: false + ] + } withName: '.*:CLAIRSTO' { ext.args = { "--sample_name ${meta.id}" } publishDir = [ diff --git a/conf/test.config b/conf/test.config index 96e61b28..e7014818 100644 --- a/conf/test.config +++ b/conf/test.config @@ -52,7 +52,7 @@ params { fasta = "https://raw.githubusercontent.com/IntGenomicsLab/test-datasets/main/references/GRCh38_chr19.fasta.gz" // Additional params - genome = "GRCh38" + genome = "CHM13" vep_genome = "WBcel235" vep_species = "caenorhabditis_elegans" skip_wakhan = true diff --git a/modules/nf-core/longphase/haplotag/longphase-haplotag.diff b/modules/nf-core/longphase/haplotag/longphase-haplotag.diff deleted file mode 100644 index 0d93067d..00000000 --- a/modules/nf-core/longphase/haplotag/longphase-haplotag.diff +++ /dev/null @@ -1,34 +0,0 @@ -Changes in component 'nf-core/longphase/haplotag' -'modules/nf-core/longphase/haplotag/meta.yml' is unchanged -Changes in 'longphase/haplotag/main.nf': ---- modules/nf-core/longphase/haplotag/main.nf -+++ modules/nf-core/longphase/haplotag/main.nf -@@ -42,6 +42,11 @@ - if [ -f "${prefix}.out" ]; then - mv ${prefix}.out ${prefix}.log - fi -+ -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') -+ END_VERSIONS - """ - - stub: -@@ -52,5 +57,10 @@ - """ - touch ${prefix}.${suffix} - ${log} -+ -+ cat <<-END_VERSIONS > versions.yml -+ "${task.process}": -+ longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') -+ END_VERSIONS - """ --} -+} -'modules/nf-core/longphase/haplotag/environment.yml' is unchanged -'modules/nf-core/longphase/haplotag/tests/main.nf.test' is unchanged -'modules/nf-core/longphase/haplotag/tests/main.nf.test.snap' is unchanged -'modules/nf-core/longphase/haplotag/tests/nextflow.config' is unchanged -************************************************************ diff --git a/nextflow.config b/nextflow.config index 3b2b40d4..1efa481d 100644 --- a/nextflow.config +++ b/nextflow.config @@ -14,9 +14,12 @@ params { input = null // Small variant calling options - germline_var_keep = 'consensus' - somatic_var_keep = 'all' - trust_caller = 'deepvariant' + germline_var_keep = ['deepvariant', 'clair'] + somatic_var_keep = ['deepsomatic', 'clair'] + germline_var_combine = 'all' + somatic_var_combine = 'all' + prioritize_caller_germline = 'deepvariant' + prioritize_caller_somatic = 'deepsomatic' pon_vcfs = null pon_flags = null diff --git a/nextflow_schema.json b/nextflow_schema.json index 89f599c3..a83ea6e8 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -47,23 +47,49 @@ "title": "options for small variant calling", "type": "object", "properties": { - "trust_caller": { - "type": "string", - "description": "specifies which variant caller format to use when both clair and deepvariant call the same variant", - "default": "deepvariant", - "enum": ["deepvariant", "clair"] - }, "germline_var_keep": { - "type": "string", - "description": "specifies which germline variants are used for phasing and annotation. Must be [consensus, all, deepvariant, clair]", - "default": "consensus", - "enum": ["consensus", "all", "deepvariant", "clair"] + "type": "array", + "description": "List of germline variant callers to use. Must include at least one of [deepvariant, clair].", + "default": ["deepvariant", "clair"], + "items": { + "type": "string", + "enum": ["deepvariant", "clair"] + }, + "minItems": 1 }, "somatic_var_keep": { + "type": "array", + "description": "List of somatic variant callers to use. Must include at least one of [deepsomatic, clair].", + "default": ["deepsomatic", "clair"], + "items": { + "type": "string", + "enum": ["deepsomatic", "clair"] + }, + "minItems": 1 + }, + "germline_var_combine": { + "type": "string", + "description": "When two germline callers are used, specifies how to combine them. 'consensus' keeps only variants called by both callers; 'all' keeps all variants from both callers.", + "default": "all", + "enum": ["consensus", "all"] + }, + "somatic_var_combine": { "type": "string", - "description": "specifies which somatic variants are used for phasing and annotation. Must be [consensus, all,deepvariant,clair]", + "description": "When two somatic callers are used, specifies how to combine them. 'consensus' keeps only variants called by both callers; 'all' keeps all variants from both callers.", "default": "all", - "enum": ["consensus", "all", "deepvariant", "clair"] + "enum": ["consensus", "all"] + }, + "prioritize_caller_germline": { + "type": "string", + "description": "When both germline callers are used, specifies which caller's format to use for variants called by both. Must be [deepvariant, clair].", + "default": "deepvariant", + "enum": ["deepvariant", "clair"] + }, + "prioritize_caller_somatic": { + "type": "string", + "description": "When both somatic callers are used, specifies which caller's format to use for variants called by both. Must be [deepsomatic, clair].", + "default": "deepsomatic", + "enum": ["deepsomatic", "clair"] } } }, diff --git a/subworkflows/local/paired/paired_smallvar_germline.nf b/subworkflows/local/paired/paired_smallvar_germline.nf index 2e9f286c..a85eb096 100644 --- a/subworkflows/local/paired/paired_smallvar_germline.nf +++ b/subworkflows/local/paired/paired_smallvar_germline.nf @@ -18,7 +18,7 @@ workflow PAIRED_SMALLVAR_GERMLINE { germline_vcf = channel.empty() germline_tbi = channel.empty() // COMBINE NORMAL BAMS WITH DOWNLOADED CLAIR3 MODELS - if(params.germline_var_keep != 'deepvariant') { + if(params.germline_var_keep.contains('clair')) { clair3_models .map{ meta, file -> @@ -65,7 +65,7 @@ workflow PAIRED_SMALLVAR_GERMLINE { .set{clair3_ch} } // DEEPVARIANT - if(params.germline_var_keep != 'clair') { + if(params.germline_var_keep.contains('deepvariant')) { normal_bams .map {meta, bam, bai -> @@ -100,7 +100,7 @@ workflow PAIRED_SMALLVAR_GERMLINE { .set{deepvariant_ch} } // COMBINE GERMLINE VARIATION - if (params.germline_var_keep != 'clair' && params.germline_var_keep != 'deepvariant' ) { + if (params.germline_var_keep.size() > 1) { clair3_ch .mix(deepvariant_ch) .set{combined_germline_ch} @@ -109,17 +109,18 @@ workflow PAIRED_SMALLVAR_GERMLINE { combined_germline_ch, fasta, fai, - params.germline_var_keep + params.prioritize_caller_germline, + params.germline_var_combine ) GERMLINE_CONSENSUS.out.vcf .join(GERMLINE_CONSENSUS.out.tbi) .set{ germline_vcf } } - else if (params.germline_var_keep == 'clair') { + else if (params.germline_var_keep == ['clair']) { clair3_ch .set{germline_vcf} } - else if (params.germline_var_keep == 'deepvariant') { + else if (params.germline_var_keep == ['deepvariant']) { deepvariant_ch .set{germline_vcf} } diff --git a/subworkflows/local/paired/paired_smallvar_somatic.nf b/subworkflows/local/paired/paired_smallvar_somatic.nf index 421864bd..37c77d7e 100644 --- a/subworkflows/local/paired/paired_smallvar_somatic.nf +++ b/subworkflows/local/paired/paired_smallvar_somatic.nf @@ -20,7 +20,7 @@ workflow PAIRED_SMALLVAR_SOMATIC { somatic_tbi = channel.empty() // CLAIRS - if(params.somatic_var_keep != 'deepvariant') { + if(params.somatic_var_keep.contains('clair')) { tumor_normal_bams .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> return[meta , tumor_bam, tumor_bai, normal_bam, normal_bai, meta.clairS_model] @@ -57,7 +57,7 @@ workflow PAIRED_SMALLVAR_SOMATIC { } // DEEPSOMATIC - if(params.somatic_var_keep != 'clair') { + if(params.somatic_var_keep.contains('deepsomatic')) { tumor_normal_bams .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> @@ -83,7 +83,7 @@ workflow PAIRED_SMALLVAR_SOMATIC { } // COMBINE GERMLINE VARIATION - if (params.somatic_var_keep != 'clair' && params.somatic_var_keep != 'deepvariant' ) { + if (params.somatic_var_keep.size() > 1) { clairs_ch .mix(deepsomatic_ch) .set{combine_somatic_ch} @@ -92,18 +92,19 @@ workflow PAIRED_SMALLVAR_SOMATIC { combine_somatic_ch, fasta, fai, - params.somatic_var_keep + params.prioritize_caller_somatic, + params.somatic_var_combine ) SOMATIC_CONSENSUS.out.vcf .join(SOMATIC_CONSENSUS.out.tbi) .set{ somatic_vcf } } - else if (params.somatic_var_keep == 'clair') { + else if (params.somatic_var_keep == ['clair']) { clairs_ch .set{somatic_vcf} } - else if (params.somatic_var_keep == 'deepvariant') { + else if (params.somatic_var_keep == ['deepsomatic']) { deepsomatic_ch .set{somatic_vcf} } diff --git a/subworkflows/local/small_variant_consensus.nf b/subworkflows/local/small_variant_consensus.nf index 4665d4bb..cf3b6e86 100644 --- a/subworkflows/local/small_variant_consensus.nf +++ b/subworkflows/local/small_variant_consensus.nf @@ -13,7 +13,8 @@ workflow SMALL_VARIANT_CONSENSUS { mixed_vcfs // [meta: w caller_info,mixed_vcfs, mixed_indicies] fasta fai - var_keep_method + prioritize_caller + combine_method main: //normalize VCFs @@ -94,47 +95,34 @@ workflow SMALL_VARIANT_CONSENSUS { } .set{mixed_vcfs} - if (var_keep_method == 'consensus') { - mixed_vcfs - .map{ meta, vcfs, tbis -> - def file = [] - def target = [] - def regions = [] - return [meta, vcfs, tbis, file, target, regions] - } - .set{isec_input} + mixed_vcfs + .map{ meta, vcfs, tbis -> + def file = [] + def target = [] + def regions = [] + return [meta, vcfs, tbis, file, target, regions] + } + .set{isec_input} - BCFTOOLS_ISEC(isec_input) + BCFTOOLS_ISEC(isec_input) - if (params.trust_caller == 'deepvariant') { + if (combine_method == 'consensus') { + if (prioritize_caller in ['deepvariant', 'deepsomatic']) { BCFTOOLS_ISEC.out.deepvar_consensus_vcf - .set{vcf} + .set{vcf} BCFTOOLS_ISEC.out.deepvar_consensus_tbi - .set{tbi} + .set{tbi} } - if (params.trust_caller == 'clair') { + else if (prioritize_caller == 'clair') { BCFTOOLS_ISEC.out.clair_consensus_vcf - .set{vcf} + .set{vcf} BCFTOOLS_ISEC.out.clair_consensus_tbi - .set{tbi} + .set{tbi} } - } - else if (var_keep_method == 'all'){ - - mixed_vcfs - .map{ meta, vcfs, tbis -> - def file = [] - def target = [] - def regions = [] - return [meta, vcfs, tbis, file, target, regions] - } - .set{isec_input} - - BCFTOOLS_ISEC(isec_input) - - if (params.trust_caller == 'deepvariant') { + else if (combine_method == 'all') { + if (prioritize_caller in ['deepvariant', 'deepsomatic']) { BCFTOOLS_ISEC.out.deepvar_consensus_vcf .join(BCFTOOLS_ISEC.out.deepvar_consensus_tbi) .join(BCFTOOLS_ISEC.out.clair_private_vcf) @@ -147,8 +135,7 @@ workflow SMALL_VARIANT_CONSENSUS { BCFTOOLS_CONCAT.out.vcf .set{concat_out} } - - else if (params.trust_caller == 'clair') { + else if (prioritize_caller == 'clair') { BCFTOOLS_ISEC.out.deepvar_private_vcf .join(BCFTOOLS_ISEC.out.deepvar_private_tbi) .join(BCFTOOLS_ISEC.out.clair_consensus_vcf) diff --git a/subworkflows/local/tumor_only/tumoronly_smallvar.nf b/subworkflows/local/tumor_only/tumoronly_smallvar.nf index 2dab2726..4603663a 100644 --- a/subworkflows/local/tumor_only/tumoronly_smallvar.nf +++ b/subworkflows/local/tumor_only/tumoronly_smallvar.nf @@ -29,7 +29,7 @@ workflow TUMORONLY_SMALLVAR { // CLAIRS-TO (SOMATIC/NONGERMLINE VARIANT CALLING) - if(params.somatic_var_keep != 'deepvariant') { + if(params.somatic_var_keep.contains('clair') || params.germline_var_keep.contains('clair')) { tumor_bams .map { meta, bam, bai -> return [ meta, bam, bai, meta.clairSTO_model] @@ -70,7 +70,7 @@ workflow TUMORONLY_SMALLVAR { .set{clairsto_somatic_ch} } // DEEPVARIANT - if(params.somatic_var_keep != 'clair') { + if(params.germline_var_keep.contains('deepvariant')) { tumor_bams .map { meta, bam, bai -> def intervals = [] @@ -97,7 +97,7 @@ workflow TUMORONLY_SMALLVAR { } // COMBINE GERMLINE VARIANTS - if (params.germline_var_keep != 'clair' && params.germline_var_keep != 'deepvariant' ) { + if (params.germline_var_keep.size() > 1) { clairsto_germline_ch .mix(deepvariant_ch) .set{combined_germline_ch} @@ -106,22 +106,23 @@ workflow TUMORONLY_SMALLVAR { combined_germline_ch, fasta, fai, - params.germline_var_keep + params.prioritize_caller_germline, + params.germline_var_combine ) GERMLINE_CONSENSUS.out.vcf .join(GERMLINE_CONSENSUS.out.tbi) .set{germline_vcf} } - else if (params.germline_var_keep == 'clair') { + else if (params.germline_var_keep == ['clair']) { clairsto_germline_ch .set{germline_vcf} } - else if (params.germline_var_keep == 'deepvariant') { + else if (params.germline_var_keep == ['deepvariant']) { deepvariant_ch .set{germline_vcf} } // DEEPSOMATIC - if(params.somatic_var_keep != 'clair') { + if(params.somatic_var_keep.contains('deepsomatic')) { tumor_bams .map { meta, tumor_bam, tumor_bai -> def normal_bam = [] @@ -146,7 +147,7 @@ workflow TUMORONLY_SMALLVAR { .set{deepsomatic_ch} } // COMBINE SOMATIC VARIATION - if (params.somatic_var_keep != 'clair' && params.somatic_var_keep != 'deepvariant' ) { + if (params.somatic_var_keep.size() > 1) { clairsto_somatic_ch .mix(deepsomatic_ch) .set{combined_somatic_ch} @@ -155,18 +156,19 @@ workflow TUMORONLY_SMALLVAR { combined_somatic_ch, fasta, fai, - params.somatic_var_keep + params.prioritize_caller_somatic, + params.somatic_var_combine ) SOMATIC_CONSENSUS.out.vcf .join(SOMATIC_CONSENSUS.out.tbi) .set{somatic_vcf} } - else if (params.somatic_var_keep == 'clair') { + else if (params.somatic_var_keep == ['clair']) { clairsto_somatic_ch .set{somatic_vcf} } - else if (params.somatic_var_keep == 'deepvariant') { - deepvariant_ch + else if (params.somatic_var_keep == ['deepsomatic']) { + deepsomatic_ch .set{somatic_vcf} } diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index 40f3c9bc..5a4e1a5e 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -100,7 +100,7 @@ workflow LRSOMATIC { pon_files = params.pon_vcfs.collect { file(it) } pon_flags = params.pon_flags } - else { + else if (params.genome == 'GRCh38') { pon_files = [ getGenomeAttribute('gnomad'), getGenomeAttribute('dbsnp'), @@ -114,6 +114,22 @@ workflow LRSOMATIC { "False" ] } + else if (params.genome == 'CHM13') { + pon_files = [ + getGenomeAttribute('gnomad'), + getGenomeAttribute('dbsnp'), + getGenomeAttribute('onekgenomes'), + getGenomeAttribute('colors'), + getGenomeAttribute('asap') + ] + pon_flags = [ + "True", + "True", + "False", + "False", + "False" + ] + } if (pon_files.size() != pon_flags.size()) { error "PoN VCFs and allele flags must have same length" } From 4834e8c96ffab5c6e36d0a97b8d5b331eec2a655 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Mon, 30 Mar 2026 11:00:33 +0200 Subject: [PATCH 34/36] updates to documentation/process labels --- conf/base.config | 18 +- modules/local/clair3/main.nf | 3 +- .../local/deepsomatic/callvariants/main.nf | 3 +- modules/local/fibertoolsrs/fire/main.nf | 3 +- .../local/fibertoolsrs/nucleosomes/main.nf | 3 +- modules/local/fibertoolsrs/predictm6a/main.nf | 3 +- modules/local/longphase/modcall/main.nf | 4 +- .../nf-core/deepvariant/callvariants/main.nf | 3 +- .../nf-core/deepvariant/makeexamples/main.nf | 2 +- .../deepvariant/postprocessvariants/main.nf | 2 +- nextflow.config | 1 + nextflow_schema.json | 59 +++- subworkflows/local/deepsomatic.nf | 48 ++- .../local/paired/paired_smallvar_germline.nf | 54 +++- .../local/paired/paired_smallvar_somatic.nf | 66 +++- subworkflows/local/phasing_haplotyping.nf | 136 ++++++-- subworkflows/local/prepare_annotation.nf | 25 +- subworkflows/local/prepare_reference_files.nf | 86 +++-- subworkflows/local/small_variant_consensus.nf | 94 ++++-- .../local/tumor_only/tumoronly_smallvar.nf | 89 ++++- .../utils_nfcore_lrsomatic_pipeline/main.nf | 32 +- tests/default.nf.test.snap | 61 ++-- workflows/lrsomatic.nf | 305 +++++++++++++----- 23 files changed, 840 insertions(+), 260 deletions(-) diff --git a/conf/base.config b/conf/base.config index cd23b577..08378854 100644 --- a/conf/base.config +++ b/conf/base.config @@ -26,9 +26,25 @@ process { // adding in your local modules too. // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors - withLabel: process_gpu { + withLabel:process_gpu_high { ext.use_gpu = { params.use_gpu as boolean } accelerator = { (params.use_gpu as boolean) ? 1 : null } + cpus = { 8 * task.attempt } + memory = { 48.GB * task.attempt } + time = { 16.h * task.attempt } + } + withLabel:process_gpu_very_high { + ext.use_gpu = { params.use_gpu as boolean } + accelerator = { (params.use_gpu as boolean) ? 1 : null } + cpus = { 16 * task.attempt } + memory = { 96.GB * task.attempt } + time = { 16.h * task.attempt } + } + withLabel:process_gpu_very_high_memory { + ext.use_gpu = { params.use_gpu as boolean } + accelerator = { (params.use_gpu as boolean) ? 1 : null } + cpus = { 16 * task.attempt } + memory = { 128.GB * task.attempt } } withLabel:process_single { cpus = { 1 } diff --git a/modules/local/clair3/main.nf b/modules/local/clair3/main.nf index f53b8b58..44479086 100644 --- a/modules/local/clair3/main.nf +++ b/modules/local/clair3/main.nf @@ -1,7 +1,6 @@ process CLAIR3 { tag "$meta.id" - label 'process_very_high' - label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" + label "${params.use_gpu ? 'process_gpu_very_high' : 'process_very_high'}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/deepsomatic/callvariants/main.nf b/modules/local/deepsomatic/callvariants/main.nf index 10cdb40b..afeb33e1 100644 --- a/modules/local/deepsomatic/callvariants/main.nf +++ b/modules/local/deepsomatic/callvariants/main.nf @@ -1,7 +1,6 @@ process DEEPSOMATIC_CALLVARIANTS { tag "$meta.id" - label 'process_high' - label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" + label "${params.use_gpu ? 'process_gpu_high' : 'process_high'}" //Conda is not supported at the moment container "docker.io/google/deepsomatic:1.7.0" diff --git a/modules/local/fibertoolsrs/fire/main.nf b/modules/local/fibertoolsrs/fire/main.nf index e78bf544..eed76d97 100644 --- a/modules/local/fibertoolsrs/fire/main.nf +++ b/modules/local/fibertoolsrs/fire/main.nf @@ -1,8 +1,7 @@ process FIBERTOOLSRS_FIRE { tag "$meta.id" label 'process_very_high' - label 'process_high_memory' - label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" + label "${params.use_gpu ? 'process_gpu_very_high_memory' : 'process_high_memory'}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/fibertoolsrs/nucleosomes/main.nf b/modules/local/fibertoolsrs/nucleosomes/main.nf index 2357d638..db42d106 100644 --- a/modules/local/fibertoolsrs/nucleosomes/main.nf +++ b/modules/local/fibertoolsrs/nucleosomes/main.nf @@ -1,8 +1,7 @@ process FIBERTOOLSRS_NUCLEOSOMES { tag "$meta.id" label 'process_very_high' - label 'process_high_memory' - label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" + label "${params.use_gpu ? 'process_gpu_very_high_memory' : 'process_high_memory'}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/fibertoolsrs/predictm6a/main.nf b/modules/local/fibertoolsrs/predictm6a/main.nf index 0ac25676..bb355bfe 100644 --- a/modules/local/fibertoolsrs/predictm6a/main.nf +++ b/modules/local/fibertoolsrs/predictm6a/main.nf @@ -1,8 +1,7 @@ process FIBERTOOLSRS_PREDICTM6A { tag "$meta.id" label 'process_very_high' - label 'process_high_memory' - label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" + label "${params.use_gpu ? 'process_gpu_very_high_memory' : 'process_high_memory'}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/longphase/modcall/main.nf b/modules/local/longphase/modcall/main.nf index a49ffa6d..45880aba 100644 --- a/modules/local/longphase/modcall/main.nf +++ b/modules/local/longphase/modcall/main.nf @@ -1,6 +1,6 @@ process LONGPHASE_MODCALL { tag "$meta.id" - label 'process_very_high' + label 'process_high' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? @@ -29,7 +29,7 @@ process LONGPHASE_MODCALL { longphase \\ modcall \\ $args \\ - --threads $task.cpus \\ + --threads 1 \\ -o ${prefix} \\ --reference ${fasta} \\ -b ${bam} \\ diff --git a/modules/nf-core/deepvariant/callvariants/main.nf b/modules/nf-core/deepvariant/callvariants/main.nf index d9218062..e0b24884 100644 --- a/modules/nf-core/deepvariant/callvariants/main.nf +++ b/modules/nf-core/deepvariant/callvariants/main.nf @@ -1,8 +1,7 @@ process DEEPVARIANT_CALLVARIANTS { tag "$meta.id" - label 'process_high' - label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" + label "${params.use_gpu ? 'process_gpu_very_high' : 'process_very_high'}" //Conda is not supported at the moment container "docker.io/google/deepvariant:1.9.0" diff --git a/modules/nf-core/deepvariant/makeexamples/main.nf b/modules/nf-core/deepvariant/makeexamples/main.nf index d553e980..05bd5a93 100644 --- a/modules/nf-core/deepvariant/makeexamples/main.nf +++ b/modules/nf-core/deepvariant/makeexamples/main.nf @@ -1,6 +1,6 @@ process DEEPVARIANT_MAKEEXAMPLES { tag "$meta.id" - label 'process_high' + label 'process_very_high' //Conda is not supported at the moment container "docker.io/google/deepvariant:1.9.0" diff --git a/modules/nf-core/deepvariant/postprocessvariants/main.nf b/modules/nf-core/deepvariant/postprocessvariants/main.nf index 2a7e8fb6..dd949901 100644 --- a/modules/nf-core/deepvariant/postprocessvariants/main.nf +++ b/modules/nf-core/deepvariant/postprocessvariants/main.nf @@ -1,6 +1,6 @@ process DEEPVARIANT_POSTPROCESSVARIANTS { tag "$meta.id" - label 'process_medium' + label 'process_high' //Conda is not supported at the moment container "docker.io/google/deepvariant:1.9.0" diff --git a/nextflow.config b/nextflow.config index 1efa481d..64cf5653 100644 --- a/nextflow.config +++ b/nextflow.config @@ -21,6 +21,7 @@ params { prioritize_caller_germline = 'deepvariant' prioritize_caller_somatic = 'deepsomatic' + // PON Options pon_vcfs = null pon_flags = null diff --git a/nextflow_schema.json b/nextflow_schema.json index a83ea6e8..eceb2aaf 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -43,6 +43,22 @@ } } }, + "pon_options": { + "title": "Panel of Normals (PON) options", + "type": "object", + "description": "Options for panel of normals filtering", + "default": "", + "properties": { + "pon_vcfs": { + "type": "string", + "description": "Path to panel of normals VCF file(s) for somatic variant filtering" + }, + "pon_flags": { + "type": "string", + "description": "Additional flags to pass to the PON filtering step" + } + } + }, "small_variant_calling_options": { "title": "options for small variant calling", "type": "object", @@ -108,6 +124,7 @@ }, "igenomes_ignore": { "type": "boolean", + "default": false, "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", "hidden": true, @@ -182,7 +199,9 @@ "default": 113 }, "download_vep_cache": { - "type": "boolean" + "type": "boolean", + "default": false, + "description": "Download the VEP cache if not already present" }, "vep_custom": { "type": "string" @@ -262,6 +281,7 @@ }, "ascat_pdf_plots": { "type": "boolean", + "default": false, "description": "Boolean for ASCAT production of pdf plots (entered as string)" } } @@ -285,49 +305,73 @@ "properties": { "skip_qc": { "type": "boolean", + "default": false, "description": "Skips all QC steps" }, "skip_cramino": { "type": "boolean", + "default": false, "description": "Skips Cramino" }, "skip_mosdepth": { "type": "boolean", + "default": false, "description": "Skips Mosdepth" }, "skip_bamstats": { "type": "boolean", + "default": false, "description": "Skips samtools flagstat, stats, and idxstats" }, "skip_wakhan": { "type": "boolean", + "default": false, "description": "Skips wakhan" }, "skip_fiber": { "type": "boolean", + "default": false, "description": "Skip Fibertools steps" }, "skip_ascat": { "type": "boolean", + "default": false, "description": "Skip ASCAT" }, "skip_m6a": { "type": "boolean", + "default": false, "description": "Skip m6a calling by Fibertools" }, "skip_vep": { - "type": "boolean" + "type": "boolean", + "default": false, + "description": "Skip VEP annotation" }, "skip_normalfiber": { - "type": "boolean" + "type": "boolean", + "default": false, + "description": "Skip Fibertools steps for the normal sample" }, "skip_nanoplot": { "type": "boolean", + "default": false, "description": "Skip Nanoplot" }, "skip_whatshapstats": { "type": "boolean", + "default": false, "description": "Skip WhatsHap stats" + }, + "skip_modcall": { + "type": "boolean", + "default": false, + "description": "Skip modification calling" + }, + "use_gpu": { + "type": "boolean", + "default": false, + "description": "Use GPU for supported tools (e.g. DeepVariant, DeepSomatic)" } } }, @@ -388,6 +432,7 @@ "properties": { "version": { "type": "boolean", + "default": false, "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", "hidden": true @@ -411,6 +456,7 @@ }, "plaintext_email": { "type": "boolean", + "default": false, "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", "hidden": true @@ -425,6 +471,7 @@ }, "monochrome_logs": { "type": "boolean", + "default": false, "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", "hidden": true @@ -476,14 +523,17 @@ }, "help": { "type": ["boolean", "string"], + "default": false, "description": "Display the help message." }, "help_full": { "type": "boolean", + "default": false, "description": "Display the full detailed help message." }, "show_hidden": { "type": "boolean", + "default": false, "description": "Display hidden parameters in the help message (only works when --help or --help_full are provided)." } } @@ -493,6 +543,9 @@ { "$ref": "#/$defs/input_output_options" }, + { + "$ref": "#/$defs/pon_options" + }, { "$ref": "#/$defs/small_variant_calling_options" }, diff --git a/subworkflows/local/deepsomatic.nf b/subworkflows/local/deepsomatic.nf index c91ca6af..d1baf584 100644 --- a/subworkflows/local/deepsomatic.nf +++ b/subworkflows/local/deepsomatic.nf @@ -4,29 +4,49 @@ include { DEEPSOMATIC_POSTPROCESSVARIANTS } from '../../modules/local/deepsomati workflow DEEPSOMATIC { take: - ch_input // channel: [ val(meta), path(normal), path(normal_index), path(tumor), path(tumor_index)] - ch_intervals - ch_fasta // channel: [ val(meta2), path(fasta) ] - ch_fai // channel: [ val(meta3), path(fai) ] - ch_gzi // channel: [ val(meta4), path(gzi) ] + ch_input // [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + // normal_bam/bai may be [] for tumor-only mode + ch_intervals // [[:], []] -- empty intervals (genome-wide calling) + ch_fasta // [[:], fasta] + ch_fai // [[:], fai] + ch_gzi // [[:], gzi] -- bgzipped FASTA index (empty if FASTA is not bgzipped) main: + // + // MODULE: DEEPSOMATIC_MAKEEXAMPLES (label: process_high) + // Input: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + // Output: .examples -- [meta, [tfrecord shards...]] -- serialised pileup examples + // .gvcf -- [meta, [gvcf tfrecord shards...]] + // DEEPSOMATIC_MAKEEXAMPLES(ch_input, ch_fasta, ch_fai, ch_gzi) + // + // MODULE: DEEPSOMATIC_CALLVARIANTS (label: process_gpu / process_high) + // Input: DEEPSOMATIC_MAKEEXAMPLES.out.examples -- [meta, [tfrecord shards...]] + // Output: .call_variants_tfrecords -- [meta, tfrecord] -- DNN variant call records + // DEEPSOMATIC_CALLVARIANTS(DEEPSOMATIC_MAKEEXAMPLES.out.examples) - // Input to postprocessing step needs both the gvcfs from MAKEEXAMPLES and the variant - // calls from CALLVARIANTS. Joining on meta, which is assumed to be unique. - - + // Join CALLVARIANTS output with MAKEEXAMPLES gVCF records (both keyed on meta) + // The postprocessing step needs both the DNN calls and the gVCF pileup records ch_postproc_input = DEEPSOMATIC_CALLVARIANTS.out.call_variants_tfrecords.join( DEEPSOMATIC_MAKEEXAMPLES.out.gvcf, failOnMismatch: true ).map { meta, call_tfrecord, gvcf_tfrecords -> [meta, call_tfrecord, gvcf_tfrecords, [], []] } - + // ch_postproc_input: [meta, call_tfrecord, [gvcf_tfrecords...], [], []] + // trailing [] are for optional candidate positions and haplotype outputs (unused) + + // + // MODULE: DEEPSOMATIC_POSTPROCESSVARIANTS (label: process_medium) + // Input: [meta, call_tfrecord, [gvcf_tfrecords...], [], []] + // Output: .vcf -- [meta, vcf] -- somatic variant calls (VCF) + // .vcf_index -- [meta, tbi] + // .gvcf -- [meta, gvcf] -- genome VCF (all sites) + // .gvcf_index-- [meta, tbi] + // DEEPSOMATIC_POSTPROCESSVARIANTS( ch_postproc_input, ch_fasta, @@ -35,8 +55,8 @@ workflow DEEPSOMATIC { ) emit: - vcf = DEEPSOMATIC_POSTPROCESSVARIANTS.out.vcf - vcf_index = DEEPSOMATIC_POSTPROCESSVARIANTS.out.vcf_index - gvcf = DEEPSOMATIC_POSTPROCESSVARIANTS.out.gvcf - gvcf_index = DEEPSOMATIC_POSTPROCESSVARIANTS.out.gvcf_index + vcf = DEEPSOMATIC_POSTPROCESSVARIANTS.out.vcf // [meta, vcf] + vcf_index = DEEPSOMATIC_POSTPROCESSVARIANTS.out.vcf_index // [meta, tbi] + gvcf = DEEPSOMATIC_POSTPROCESSVARIANTS.out.gvcf // [meta, gvcf] + gvcf_index = DEEPSOMATIC_POSTPROCESSVARIANTS.out.gvcf_index // [meta, tbi] } diff --git a/subworkflows/local/paired/paired_smallvar_germline.nf b/subworkflows/local/paired/paired_smallvar_germline.nf index a85eb096..2ded1cf6 100644 --- a/subworkflows/local/paired/paired_smallvar_germline.nf +++ b/subworkflows/local/paired/paired_smallvar_germline.nf @@ -8,24 +8,30 @@ include { SMALL_VARIANT_CONSENSUS as GERMLINE_CONSENSUS } from '../../../subwo workflow PAIRED_SMALLVAR_GERMLINE { take: - normal_bams // [ meta, normal_bam, normal_bai ] - fasta - fai - clair3_models + normal_bams // [meta, normal_bam, normal_bai] -- normal sample BAMs from T/N pairs + fasta // [[:], fasta] + fai // [[:], fai] + clair3_models // [meta(id=model_name), model_dir] -- downloaded Clair3 model directories main: ch_versions = channel.empty() germline_vcf = channel.empty() germline_tbi = channel.empty() + // COMBINE NORMAL BAMS WITH DOWNLOADED CLAIR3 MODELS + // Clair3 requires the model directory path; models are keyed by model name (meta.id) if(params.germline_var_keep.contains('clair')) { + // Extract model name from meta.id for combine-by key clair3_models .map{ meta, file -> def clair3_model_name = meta.id return [meta, clair3_model_name, file] } .set{clair3_models} + // clair3_models: [meta(id=model_name), model_name_str, model_dir] + + // Emit [meta, clair3_model_name, bam, bai] to use model_name as the combine key normal_bams .map{ meta, bam, bai -> def new_meta = meta.subMap('id', @@ -40,15 +46,25 @@ workflow PAIRED_SMALLVAR_GERMLINE { return [ new_meta, meta.clair3_model, bam, bai ] } .set { normal_bams_model } + // normal_bams_model: [meta, clair3_model_name, bam, bai] + // clair3_model_name is the join key used by .combine(clair3_models, by:1) - // CLAIR3 + // + // MODULE: CLAIR3 (label: process_high) + // Input: [meta, bam, bai, model_dir, platform_str] + // fasta / fai + // Output: .vcf -- [meta, vcf] -- germline SNVs/indels + // .tbi -- [meta, tbi] + // normal_bams_model - .combine(clair3_models,by:1) + .combine(clair3_models,by:1) // join on clair3_model_name .map {_clair3_model, meta_bam, bam, bai, _meta_model, model -> def platform = (meta_bam.platform == 'pb') ? 'hifi' : meta_bam.platform return [meta_bam, bam, bai, model, platform] } .set{ clair3_input_ch } + // clair3_input_ch: [meta, bam, bai, model_dir, platform_str] + // platform_str: 'hifi' for PacBio ('pb' → 'hifi'), otherwise meta.platform (e.g. 'ont') CLAIR3 ( clair3_input_ch, @@ -63,10 +79,20 @@ workflow PAIRED_SMALLVAR_GERMLINE { return [new_meta, vcf, tbi] } .set{clair3_ch} + // clair3_ch: [meta(+caller:'clair3'), vcf, tbi] } + // DEEPVARIANT if(params.germline_var_keep.contains('deepvariant')) { + // + // SUBWORKFLOW: DEEPVARIANT (nf-core) + // Input: [meta, bam, bai, []] -- [] is empty intervals (genome-wide) + // fasta / fai + // [[:],[]] x2 -- empty PAR/GFF interval files (not used for WGS) + // Output: .vcf -- [meta, vcf] + // .vcf_index -- [meta, tbi] + // normal_bams .map {meta, bam, bai -> def new_meta = meta.subMap('id', @@ -82,13 +108,14 @@ workflow PAIRED_SMALLVAR_GERMLINE { return [new_meta, bam, bai, intervals] } .set{deepvariant_input_ch} + // deepvariant_input_ch: [meta, bam, bai, []] DEEPVARIANT ( deepvariant_input_ch, fasta, fai, - [[:],[]], - [[:],[]] + [[:],[]], // PAR regions (not used) + [[:],[]] // GFF annotation (not used) ) DEEPVARIANT.out.vcf @@ -98,13 +125,20 @@ workflow PAIRED_SMALLVAR_GERMLINE { return [new_meta, vcf, tbi] } .set{deepvariant_ch} + // deepvariant_ch: [meta(+caller:'deepvariant'), vcf, tbi] } + // COMBINE GERMLINE VARIATION + // If both callers requested: run consensus subworkflow; otherwise pass through single-caller output if (params.germline_var_keep.size() > 1) { + // Mix both caller VCFs into a single channel for GERMLINE_CONSENSUS clair3_ch .mix(deepvariant_ch) .set{combined_germline_ch} + // combined_germline_ch: [meta(+caller), vcf, tbi] -- one item per caller per sample + // SUBWORKFLOW: GERMLINE_CONSENSUS (SMALL_VARIANT_CONSENSUS alias) + // Normalise, annotate with caller ID, intersect, and combine per params GERMLINE_CONSENSUS( combined_germline_ch, fasta, @@ -115,6 +149,7 @@ workflow PAIRED_SMALLVAR_GERMLINE { GERMLINE_CONSENSUS.out.vcf .join(GERMLINE_CONSENSUS.out.tbi) .set{ germline_vcf } + // germline_vcf: [meta(+caller from consensus), vcf, tbi] } else if (params.germline_var_keep == ['clair']) { clair3_ch @@ -125,6 +160,7 @@ workflow PAIRED_SMALLVAR_GERMLINE { .set{germline_vcf} } + // Strip 'caller' field from final germline VCF meta (not needed downstream) germline_vcf .map{ meta, vcf, tbi -> def new_meta = meta.subMap('id', @@ -141,5 +177,5 @@ workflow PAIRED_SMALLVAR_GERMLINE { .set{germline_vcf} emit: - germline_vcf + germline_vcf // [meta, vcf, tbi] -- final germline VCF (Clair3, DeepVariant, or consensus) } diff --git a/subworkflows/local/paired/paired_smallvar_somatic.nf b/subworkflows/local/paired/paired_smallvar_somatic.nf index 37c77d7e..c19553a9 100644 --- a/subworkflows/local/paired/paired_smallvar_somatic.nf +++ b/subworkflows/local/paired/paired_smallvar_somatic.nf @@ -10,23 +10,32 @@ include { SMALL_VARIANT_CONSENSUS as SOMATIC_CONSENSUS } from '../../../subwor workflow PAIRED_SMALLVAR_SOMATIC { take: - tumor_normal_bams // [ meta, tumor_bam, tumor_bai, normal_hapbam, normal_bai ] - fasta - fai + tumor_normal_bams // [meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + fasta // [[:], fasta] + fai // [[:], fai] main: ch_versions = channel.empty() somatic_vcf = channel.empty() somatic_tbi = channel.empty() - // CLAIRS + // CLAIRS: somatic SNV/indel calling from T/N paired BAMs if(params.somatic_var_keep.contains('clair')) { + // Append ClairS model name (from meta) as the last element for CLAIRS module tumor_normal_bams .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> return[meta , tumor_bam, tumor_bai, normal_bam, normal_bai, meta.clairS_model] } .set { clairs_input } - + // clairs_input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, clairS_model_str] + + // + // MODULE: CLAIRS (label: process_high) + // Input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, model_str] + // fasta / fai + // Output: .vcfs -- [meta, [snv_vcf, indel_vcf]] -- separate SNV and indel VCFs + // .tbi -- [meta, [snv_tbi, indel_tbi]] + // CLAIRS ( clairs_input, fasta, @@ -34,15 +43,27 @@ workflow PAIRED_SMALLVAR_SOMATIC { ) // CONCAT CLAIRS INDEL AND SNV OUTPUT - + // ClairS outputs separate SNV and indel VCFs; merge into a single sorted VCF CLAIRS.out.vcfs .join(CLAIRS.out.tbi) .set{clairs_out} + // clairs_out: [meta, [snv_vcf, indel_vcf], [snv_tbi, indel_tbi]] + // + // MODULE: BCFTOOLS_CONCAT (label: process_medium) + // Input: [meta, [vcf...], [tbi...]] + // Output: .vcf -- [meta, vcf] -- unsorted concatenated SNV+indel VCF + // BCFTOOLS_CONCAT ( clairs_out ) + // + // MODULE: BCFTOOLS_SORT (label: process_medium) + // Input: [meta, vcf] + // Output: .vcf -- [meta, vcf] -- coordinate-sorted VCF + // .tbi -- [meta, tbi] + // BCFTOOLS_SORT ( BCFTOOLS_CONCAT.out.vcf ) @@ -54,23 +75,34 @@ workflow PAIRED_SMALLVAR_SOMATIC { return [new_meta, vcf, tbi] } .set{clairs_ch} + // clairs_ch: [meta(+caller:'clairs'), vcf, tbi] -- merged and sorted ClairS somatic VCF } - // DEEPSOMATIC + // DEEPSOMATIC: somatic variant calling using deep learning T/N model if(params.somatic_var_keep.contains('deepsomatic')) { + // DeepSomatic expects [normal, tumor] order (opposite of input tuple) tumor_normal_bams .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> return [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] } .set{ deepsomatic_input } - + // deepsomatic_input: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + + // + // SUBWORKFLOW: DEEPSOMATIC (local) + // Input: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + // [[:],[]] -- empty intervals + // fasta / fai / [[:],[]] -- empty GZI + // Output: .vcf -- [meta, vcf] + // .vcf_index -- [meta, tbi] + // DEEPSOMATIC ( deepsomatic_input, - [[:],[]], + [[:],[]], // intervals (empty = genome-wide) fasta, fai, - [[:],[]] + [[:],[]] // GZI (empty if FASTA is uncompressed) ) DEEPSOMATIC.out.vcf @@ -80,14 +112,18 @@ workflow PAIRED_SMALLVAR_SOMATIC { return [new_meta, vcf, tbi] } .set{deepsomatic_ch} - + // deepsomatic_ch: [meta(+caller:'deepsomatic'), vcf, tbi] } - // COMBINE GERMLINE VARIATION + + // COMBINE SOMATIC VARIATION + // If both callers requested: run consensus subworkflow; otherwise pass through single-caller output if (params.somatic_var_keep.size() > 1) { clairs_ch .mix(deepsomatic_ch) .set{combine_somatic_ch} + // combine_somatic_ch: [meta(+caller), vcf, tbi] -- one item per caller per sample + // SUBWORKFLOW: SOMATIC_CONSENSUS (SMALL_VARIANT_CONSENSUS alias) SOMATIC_CONSENSUS( combine_somatic_ch, fasta, @@ -99,6 +135,7 @@ workflow PAIRED_SMALLVAR_SOMATIC { SOMATIC_CONSENSUS.out.vcf .join(SOMATIC_CONSENSUS.out.tbi) .set{ somatic_vcf } + // somatic_vcf: [meta(+caller from consensus), vcf, tbi] } else if (params.somatic_var_keep == ['clair']) { clairs_ch @@ -108,6 +145,8 @@ workflow PAIRED_SMALLVAR_SOMATIC { deepsomatic_ch .set{somatic_vcf} } + + // Strip 'caller' from meta before emitting somatic_vcf .map{ meta, vcf, tbi -> def new_meta = meta.subMap('id', @@ -122,6 +161,7 @@ workflow PAIRED_SMALLVAR_SOMATIC { return[new_meta, vcf, tbi] } .set{somatic_vcf} + emit: - somatic_vcf + somatic_vcf // [meta, vcf, tbi] -- final somatic VCF (ClairS, DeepSomatic, or consensus) } diff --git a/subworkflows/local/phasing_haplotyping.nf b/subworkflows/local/phasing_haplotyping.nf index 935be201..e0aaf290 100644 --- a/subworkflows/local/phasing_haplotyping.nf +++ b/subworkflows/local/phasing_haplotyping.nf @@ -11,25 +11,29 @@ include { BCFTOOLS_SORT } from '../../module workflow PHASING_HAPLOTYPING { take: - tumor_normal_bams // [meta, bam, bai] - germline_vcf - somatic_vcf - fasta - fai + tumor_normal_bams // [meta, bam, bai] -- all samples: tumor, normal, and tumor-only + germline_vcf // [meta, vcf, tbi] -- germline small variants (from PAIRED_SMALLVAR_GERMLINE or TUMORONLY_SMALLVAR) + somatic_vcf // [meta, vcf, tbi] -- somatic small variants (from PAIRED_SMALLVAR_SOMATIC or TUMORONLY_SMALLVAR) + fasta // [[:], fasta] + fai // [[:], fai] main: // SPLIT INTO PAIRED AND TUMOR ONLY + // paired_data is set to the matched sample ID for paired samples, null/false for tumor-only tumor_normal_bams .branch { meta, _bams, _bai -> paired: meta.paired_data tumor_only: !meta.paired_data } .set { branched_bams } + // branched_bams.paired: [meta, bam, bai] -- tumor + normal from paired runs + // branched_bams.tumor_only: [meta, bam, bai] -- tumor-only samples branched_bams.paired .set{ paired_ch } + // Strip 'type' from tumor-only meta (no type distinction needed in this stream) branched_bams.tumor_only .map { meta, bam, bai -> def new_meta = meta.subMap('id', @@ -44,14 +48,19 @@ workflow PHASING_HAPLOTYPING { return [ new_meta, bam, bai ] } .set{ tumor_only_ch } + // tumor_only_ch: [meta (no type), bam, bai] + // Split paired samples into normal and tumor streams for separate handling paired_ch .branch { meta, _bam, _bai -> normal: meta.type == "normal" tumor: meta.type == "tumor" } .set {paired_ch_branched} + // paired_ch_branched.normal: [meta, bam, bai] -- normal BAMs from T/N pairs + // paired_ch_branched.tumor: [meta, bam, bai] -- tumor BAMs from T/N pairs + // Strip 'type' from paired normal/tumor meta to allow joining with tumor-only channel paired_ch_branched.normal .map { meta, bam, bai -> def new_meta = meta.subMap('id', @@ -66,6 +75,7 @@ workflow PHASING_HAPLOTYPING { return [ new_meta, bam, bai ] } .set{ paired_normal_ch } + // paired_normal_ch: [meta (no type), bam, bai] paired_ch_branched.tumor .map { meta, bam, bai -> @@ -81,24 +91,45 @@ workflow PHASING_HAPLOTYPING { return [ new_meta, bam, bai ] } .set{ paired_tumor_ch } + // paired_tumor_ch: [meta (no type), bam, bai] + // Germline phasing uses normal BAMs (+ tumor-only BAMs used as their own "normal" proxy) tumor_only_ch .mix(paired_normal_ch) .set { normal_bams_w_tumoronly_ch } + // normal_bams_w_tumoronly_ch: [meta, bam, bai] + // -- normal BAMs from T/N pairs + tumor-only BAMs (both phased with germline VCF) + + // Somatic phasing uses tumor BAMs (+ tumor-only BAMs) tumor_only_ch .mix(paired_tumor_ch) .set{ tumor_bams_ch} + // tumor_bams_ch: [meta, bam, bai] -- tumor BAMs from T/N pairs + tumor-only BAMs - // MODCALL + // MODCALL: detect base modifications (e.g. 5mC) from aligned BAMs using Longphase + // Results are used as additional evidence during phasing if (!params.skip_modcall) { + // + // MODULE: LONGPHASE_MODCALL_GERMLINE (label: process_high) + // Input: [meta, bam, bai] -- normal BAMs (+ tumor-only BAMs) + // fasta / fai + // Output: .mod_vcf -- [meta, vcf] -- base modification calls (e.g. CpG methylation) + // LONGPHASE_MODCALL_GERMLINE ( normal_bams_w_tumoronly_ch, fasta, fai ) + // + // MODULE: LONGPHASE_MODCALL_SOMATIC (label: process_high) + // Input: [meta, bam, bai] -- tumor BAMs (+ tumor-only BAMs) + // fasta / fai + // Output: .mod_vcf -- [meta, vcf] -- base modification calls for tumor + // + LONGPHASE_MODCALL_SOMATIC ( tumor_bams_ch, fasta, @@ -106,31 +137,54 @@ workflow PHASING_HAPLOTYPING { ) } + + // Merge germline and somatic VCFs into a single file for somatic phasing + // Longphase requires all variant sites in one VCF to produce a consistent phase block germline_vcf .join(somatic_vcf) .map { meta, germline_vcf, germline_tbi, somatic_vcf, somatic_tbi -> - def vcfs = [somatic_vcf, germline_vcf] + def vcfs = [somatic_vcf, germline_vcf] // somatic first (higher priority in phasing) def tbis = [somatic_tbi, germline_tbi] return [ meta, vcfs, tbis] } .set{germline_somatic_vcfs} + // germline_somatic_vcfs (pre-concat): [meta, [somatic_vcf, germline_vcf], [somatic_tbi, germline_tbi]] + + // + // MODULE: BCFTOOLS_CONCAT (label: process_medium) + // Input: [meta, [vcfs...], [tbis...]] -- somatic + germline VCFs to concatenate + // Output: .vcf -- [meta, vcf] -- unsorted concatenated VCF + // BCFTOOLS_CONCAT(germline_somatic_vcfs) BCFTOOLS_CONCAT.out.vcf .set{concat_out} + // concat_out: [meta, vcf] -- concatenated (unsorted) somatic+germline VCF + + // + // MODULE: BCFTOOLS_SORT (label: process_medium) + // Input: [meta, vcf] -- unsorted concatenated VCF + // Output: .vcf -- [meta, vcf] -- coordinate-sorted VCF + // .tbi -- [meta, tbi] + // BCFTOOLS_SORT(concat_out) BCFTOOLS_SORT.out.vcf .set{germline_somatic_vcfs} + // germline_somatic_vcfs (final): [meta, vcf] -- sorted combined somatic+germline VCF for somatic phasing - // PHASING + // PHASING: assign variants to haplotypes using Longphase + // - Germline phasing: uses normal BAMs + germline-only VCF (produces the phase blocks) + // - Somatic phasing: uses tumor BAMs + merged somatic+germline VCF (transfers germline phase to somatic sites) if (!params.skip_modcall) { + // With modcall: include base-modification VCF as additional phasing evidence normal_bams_w_tumoronly_ch .join(germline_vcf) .join(LONGPHASE_MODCALL_GERMLINE.out.mod_vcf) .map { meta, bam, bai, vcf, _tbi, mods-> - def svs = [] + def svs = [] // SVs for phasing are not used here return [ meta, bam, bai, vcf, svs, mods ] } .set{ longphase_phase_germline_input_ch } + // longphase_phase_germline_input_ch: [meta, bam, bai, germline_vcf, [], mod_vcf] tumor_bams_ch .join(germline_somatic_vcfs) @@ -140,8 +194,10 @@ workflow PHASING_HAPLOTYPING { return [ meta, bam, bai, vcf, svs, mods ] } .set{ longphase_phase_somatic_input_ch } + // longphase_phase_somatic_input_ch: [meta, bam, bai, somatic+germline_vcf, [], mod_vcf] } else { + // Without modcall: empty lists for SVs and mods normal_bams_w_tumoronly_ch .join(germline_vcf) .map { meta, bam, bai, vcf, _tbi -> @@ -150,6 +206,7 @@ workflow PHASING_HAPLOTYPING { return [ meta, bam, bai, vcf, svs, mods ] } .set{ longphase_phase_germline_input_ch } + // longphase_phase_germline_input_ch: [meta, bam, bai, germline_vcf, [], []] tumor_bams_ch .join(germline_somatic_vcfs) @@ -159,8 +216,16 @@ workflow PHASING_HAPLOTYPING { return [ meta, bam, bai, vcf, svs, mods ] } .set{ longphase_phase_somatic_input_ch } + // longphase_phase_somatic_input_ch: [meta, bam, bai, somatic+germline_vcf, [], []] } + // + // MODULE: LONGPHASE_PHASE_GERMLINE (label: process_medium) + // Input: [meta, bam, bai, vcf, svs, mods] -- normal BAMs + germline VCF (± mod VCF) + // fasta / fai + // Output: .snv_vcf -- [meta, vcf] -- phased germline SNV VCF (PS tags added) + // .snv_vcf_index -- [meta, tbi] + // LONGPHASE_PHASE_GERMLINE ( longphase_phase_germline_input_ch, fasta, @@ -170,7 +235,15 @@ workflow PHASING_HAPLOTYPING { LONGPHASE_PHASE_GERMLINE.out.snv_vcf .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf_index) .set{ phased_germline_vcf } - + // phased_germline_vcf: [meta, vcf, tbi] -- Longphase-phased germline VCF + + // + // MODULE: LONGPHASE_PHASE_SOMATIC (label: process_medium) + // Input: [meta, bam, bai, combined_vcf, svs, mods] -- tumor BAMs + somatic+germline VCF (± mod VCF) + // fasta / fai + // Output: .snv_vcf -- [meta, vcf] -- phased somatic (+ germline) VCF + // .snv_vcf_index -- [meta, tbi] + // LONGPHASE_PHASE_SOMATIC ( longphase_phase_somatic_input_ch, fasta, @@ -180,13 +253,14 @@ workflow PHASING_HAPLOTYPING { LONGPHASE_PHASE_SOMATIC.out.snv_vcf .join(LONGPHASE_PHASE_SOMATIC.out.snv_vcf_index) .set{ phased_somatic_vcf } + // phased_somatic_vcf: [meta, vcf, tbi] -- Longphase-phased somatic (+ germline) VCF - // HAPLOTAGING - // remove type for merging - + // HAPLOTAGGING: tag each read in the BAM with its haplotype (HP tag) using the phased germline VCF + // All sample types (tumor, normal, tumor-only) are haplotagged using the germline phase blocks + // 'type' is re-added to meta here so downstream tools can distinguish tumor from normal in the output if(!params.skip_modcall) { - + // Strip 'type' from modcall output meta to allow joining with other channels (which have no 'type') LONGPHASE_MODCALL_GERMLINE.out.mod_vcf .map { meta, mods -> def new_meta = meta.subMap('id', @@ -201,7 +275,9 @@ workflow PHASING_HAPLOTYPING { return [ new_meta, mods ] } .set{modcall_vcf_ch} + // modcall_vcf_ch: [meta (no type), mod_vcf] -- base modification VCF from germline modcall + // Build haplotag input for tumor-only samples (re-add type:"tumor") tumor_only_ch .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) .join(modcall_vcf_ch) @@ -211,6 +287,7 @@ workflow PHASING_HAPLOTYPING { return [new_meta, bam, bai, vcf, svs, mods] } .set{ tumor_only_ch } + // tumor_only_ch (updated): [meta+type:tumor, bam, bai, phased_germline_vcf, [], mod_vcf] paired_tumor_ch .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) @@ -221,6 +298,7 @@ workflow PHASING_HAPLOTYPING { return [new_meta, bam, bai, vcf, svs, mods] } .set{ paired_tumor_ch } + // paired_tumor_ch (updated): [meta+type:tumor, bam, bai, phased_germline_vcf, [], mod_vcf] paired_normal_ch .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) @@ -231,10 +309,11 @@ workflow PHASING_HAPLOTYPING { return [new_meta, bam, bai, vcf, svs, mods] } .set{ paired_normal_ch } + // paired_normal_ch (updated): [meta+type:normal, bam, bai, phased_germline_vcf, [], mod_vcf] } else { - + // Without modcall: empty lists for mods tumor_only_ch .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) .map { meta, bam, bai, vcf -> @@ -244,6 +323,7 @@ workflow PHASING_HAPLOTYPING { return [new_meta, bam, bai, vcf, svs, mods] } .set{ tumor_only_ch } + // tumor_only_ch (updated): [meta+type:tumor, bam, bai, phased_germline_vcf, [], []] paired_tumor_ch .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) @@ -254,6 +334,7 @@ workflow PHASING_HAPLOTYPING { return [new_meta, bam, bai, vcf, svs, mods] } .set{ paired_tumor_ch } + // paired_tumor_ch (updated): [meta+type:tumor, bam, bai, phased_germline_vcf, [], []] paired_normal_ch .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) @@ -264,14 +345,24 @@ workflow PHASING_HAPLOTYPING { return [new_meta, bam, bai, vcf, svs, mods] } .set{ paired_normal_ch } + // paired_normal_ch (updated): [meta+type:normal, bam, bai, phased_germline_vcf, [], []] } + // Merge all sample types for haplotagging in a single LONGPHASE_HAPLOTAG call tumor_only_ch .mix(paired_tumor_ch) .mix(paired_normal_ch) .set {longphase_haplotag_input_ch} - + // longphase_haplotag_input_ch: [meta(+type), bam, bai, phased_germline_vcf, [], mod_vcf_or_[]] + // -- all samples (tumor-only, paired tumor, paired normal) + + // + // MODULE: LONGPHASE_HAPLOTAG (label: process_medium) + // Input: [meta, bam, bai, phased_vcf, svs, mods] -- BAM + phased germline VCF (± mod VCF) + // fasta / fai + // Output: .bam -- [meta, bam] -- BAM with HP (haplotype) and PS (phase set) tags added to reads + // LONGPHASE_HAPLOTAG ( longphase_haplotag_input_ch, fasta, @@ -280,17 +371,24 @@ workflow PHASING_HAPLOTYPING { LONGPHASE_HAPLOTAG.out.bam .set{ tumor_normal_hapbams_ch } + // tumor_normal_hapbams_ch (pre-index): [meta, bam] -- haplotagged BAM (no index yet) + // + // MODULE: SAMTOOLS_INDEX (label: process_medium) + // Input: [meta, bam] -- haplotagged BAM + // Output: .bai -- [meta, bai] + // SAMTOOLS_INDEX ( tumor_normal_hapbams_ch ) tumor_normal_hapbams_ch .join(SAMTOOLS_INDEX.out.bai) .set{ tumor_normal_hapbams_ch } + // tumor_normal_hapbams_ch (final): [meta, bam, bai] -- haplotagged BAM with index emit: - tumor_normal_hapbams_ch - phased_germline_vcf - phased_somatic_vcf + tumor_normal_hapbams_ch // [meta, bam, bai] -- haplotagged BAMs for all samples + phased_germline_vcf // [meta, vcf, tbi] -- phased germline VCF (used by SEVERUS + VEP) + phased_somatic_vcf // [meta, vcf, tbi] -- phased somatic VCF (used by VEP) } diff --git a/subworkflows/local/prepare_annotation.nf b/subworkflows/local/prepare_annotation.nf index 8771680d..f6b98e78 100644 --- a/subworkflows/local/prepare_annotation.nf +++ b/subworkflows/local/prepare_annotation.nf @@ -3,12 +3,12 @@ include {ENSEMBLVEP_DOWNLOAD } from '../../modules/nf-core/ensemblvep/download/m workflow PREPARE_ANNOTATION { take: - vep_cache - vep_cache_version - vep_genome - vep_args - vep_species - download_vep_cache + vep_cache // path: local VEP cache directory (or S3 annotation-cache URL) + vep_cache_version // int: VEP cache version (e.g. 110) + vep_genome // str: genome assembly string (e.g. "GRCh38") + vep_args // str: extra VEP CLI arguments (parsed to detect --merged / --refseq) + vep_species // str: species name (e.g. "homo_sapiens") + download_vep_cache // bool: if true, download cache via ENSEMBLVEP_DOWNLOAD instead of using local path main: @@ -16,11 +16,16 @@ workflow PREPARE_ANNOTATION { ensemblvep_cache = channel.empty() // - // MODULE: ENSEMBLVEP_DOWNLOAD + // MODULE: ENSEMBLVEP_DOWNLOAD (label: process_medium) + // Only runs when params.download_vep_cache == true + // Input: vep_download_info -- [[:], vep_genome, vep_species, vep_cache_version] + // Output: .cache -- downloaded and extracted VEP cache directory // if (download_vep_cache) { + // Build input tuple: empty meta + genome/species/version for ENSEMBLVEP_DOWNLOAD vep_download_info = channel.of([[],vep_genome, vep_species, vep_cache_version]) + // vep_download_info: [[:], genome_str, species_str, cache_version_int] ENSEMBLVEP_DOWNLOAD ( vep_download_info @@ -31,6 +36,8 @@ workflow PREPARE_ANNOTATION { } else { + // Validate that the local cache directory exists and resolve the correct subdirectory + // The annotation-cache S3 bucket uses a version-prefixed path; local paths do not def vep_annotation_cache_key = (vep_cache == "s3://annotation-cache/vep_cache/") ? "${vep_cache_version}_${vep_genome}/" : "" def vep_species_suffix = vep_args.contains("--merged") ? '_merged' : (vep_args.contains("--refseq") ? '_refseq' : '') def vep_cache_dir = "${vep_annotation_cache_key}${vep_species}${vep_species_suffix}/${vep_cache_version}_${vep_genome}" @@ -43,11 +50,13 @@ workflow PREPARE_ANNOTATION { } } + // Collect the resolved cache root as a channel value ensemblvep_cache = channel.fromPath(file("${vep_cache}/${vep_annotation_cache_key}"), checkIfExists: true).collect() } + // ensemblvep_cache: path (or list-of-paths) to the VEP cache root directory emit: - vep_cache = ensemblvep_cache + vep_cache = ensemblvep_cache // path -- VEP cache directory (downloaded or validated local) versions = ch_versions } diff --git a/subworkflows/local/prepare_reference_files.nf b/subworkflows/local/prepare_reference_files.nf index 42e2e959..efc867d9 100644 --- a/subworkflows/local/prepare_reference_files.nf +++ b/subworkflows/local/prepare_reference_files.nf @@ -13,13 +13,13 @@ include { WGET } from '../../modules/nf-core/wget/main workflow PREPARE_REFERENCE_FILES { take: - fasta - ascat_alleles - ascat_loci - ascat_loci_gc - ascat_loci_rt - basecall_meta - clair3_modelMap + fasta // str: path to reference FASTA (may be .gz) + ascat_alleles // str: path to ASCAT allele files (directory or .zip), or null + ascat_loci // str: path to ASCAT loci files (directory or .zip), or null + ascat_loci_gc // str: path to ASCAT GC correction file (.zip or direct), or null + ascat_loci_rt // str: path to ASCAT RT correction file (.zip or direct), or null + basecall_meta // [meta, basecall_model_str, kinetics_str] -- from METAEXTRACT per sample + clair3_modelMap // Map -- used to resolve download URLs main: ch_versions = channel.empty() @@ -29,8 +29,13 @@ workflow PREPARE_REFERENCE_FILES { gc_file = channel.empty() rt_file = channel.empty() - // Check if fasta and gtf are zipped + // Decompress FASTA if gzipped; pass through as-is if already uncompressed if (fasta.endsWith('.gz')){ + // + // MODULE: UNZIP_FASTA (PIGZ_UNCOMPRESS alias; label: process_medium) + // Input: [[:], fasta.gz] + // Output: .file -- [[:], fasta] -- decompressed FASTA + // UNZIP_FASTA( [ [:], fasta ]) ch_prepared_fasta = UNZIP_FASTA.out.file @@ -38,11 +43,11 @@ workflow PREPARE_REFERENCE_FILES { } else { ch_prepared_fasta = channel.value([ [:], fasta ]) } - // ch_prepared_fasta: [[:], fasta_path] -- empty meta; uncompressed if input was .gz - - // if clair3 model is specified, then download that - // otherwise use info in bam header and download that + // ch_prepared_fasta: [[:], fasta_path] -- empty meta; uncompressed FASTA + // Build Clair3 model download URLs from basecall metadata + // Priority: explicit meta.clair3_model param > auto-detected from BAM header via modelMap + // PacBio models from HKU mirror; ONT models from Oxford Nanopore CDN basecall_meta.map { meta, basecall_model_meta, _kinetics_meta -> def id_new = basecall_model_meta ? clair3_modelMap.get(basecall_model_meta) : basecall_model_meta def meta_new = [id: id_new] @@ -51,31 +56,37 @@ workflow PREPARE_REFERENCE_FILES { def url = "${download_prefix}/${model}.tar.gz" return [ meta_new, url ] } - .unique() + .unique() // deduplicate: multiple samples with the same basecall model share one download .set{ clair3_model_urls } - // [meta(id=clair3_model_id), download_url] -- one item per unique Clair3 model; deduplicated with .unique() + // clair3_model_urls: [meta(id=clair3_model_name), download_url_str] + // one item per unique Clair3 model needed across all samples // - // MODULE: Download model + // MODULE: WGET (label: process_single) + // Input: [meta, url_str] -- model name (id) + download URL + // Output: .outfile -- [meta, tarball] -- downloaded .tar.gz model archive // - WGET ( clair3_model_urls ) ch_versions = ch_versions.mix(WGET.out.versions) // - // MODULE: Untar model + // MODULE: UNTAR (label: process_single) + // Input: WGET.out.outfile -- [meta, tarball] + // Output: .untar -- [meta, model_dir] -- extracted Clair3 model directory // - UNTAR ( WGET.out.outfile ) UNTAR.out.untar.set { downloaded_clair3_models } - // [meta(id=clair3_model_id), model_dir] -- extracted Clair3 model directory + // downloaded_clair3_models: [meta(id=clair3_model_name), model_dir] // - // MODULE: Index the fasta + // MODULE: SAMTOOLS_FAIDX (label: process_single) + // Input: [[:], fasta, []] -- empty meta + empty regions file (index full FASTA) + // false -- do not write fai to stdout + // Output: .fai -- [[:], fai_path] // SAMTOOLS_FAIDX ( ch_prepared_fasta.map { meta, fa -> [meta, fa, []] }, @@ -86,51 +97,66 @@ workflow PREPARE_REFERENCE_FILES { // ch_prepared_fai: [[:], fai_path] -- empty meta // - // Prepare ASCAT files + // Prepare ASCAT reference files + // Each file set can be provided as a .zip archive or a plain directory/file path + // All ASCAT outputs are flat file collections (no meta tuple) for use with ASCAT module // - - // prepare ascat and controlfreec reference files if ( !params.skip_ascat ) { + // Allele files: per-chromosome SNP allele frequency files (used for LogR/BAF calculation) if (!ascat_alleles) allele_files = channel.empty() else if (ascat_alleles.endsWith(".zip")) { + // MODULE: UNZIP_ALLELES (UNZIP alias; label: process_single) + // Input: [meta(id=basename), [zip_file]] -- collected zip + // Output: .unzipped_archive -- [meta, dir] -- extracted directory; flatMap lists individual files UNZIP_ALLELES(channel.fromPath(file(ascat_alleles)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) allele_files = UNZIP_ALLELES.out.unzipped_archive.flatMap { it -> it[1].listFiles() }.collect() + // allele_files: [path, path, ...] -- all per-chromosome allele files collected ch_versions = ch_versions.mix(UNZIP_ALLELES.out.versions) } else allele_files = channel.fromPath(ascat_alleles).collect() + // Loci files: per-chromosome SNP loci positions if (!ascat_loci) loci_files = channel.empty() else if (ascat_loci.endsWith(".zip")) { + // MODULE: UNZIP_LOCI (UNZIP alias; label: process_single) UNZIP_LOCI(channel.fromPath(file(ascat_loci)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) loci_files = UNZIP_LOCI.out.unzipped_archive.flatMap { it -> it[1].listFiles() }.collect() + // loci_files: [path, path, ...] -- all per-chromosome loci files collected ch_versions = ch_versions.mix(UNZIP_LOCI.out.versions) } else loci_files = channel.fromPath(ascat_loci).collect() + // GC correction file: genome-wide GC content per locus (optional) if (!ascat_loci_gc) gc_file = channel.value([]) else if ( ascat_loci_gc.endsWith(".zip") ) { + // MODULE: UNZIP_GC (UNZIP alias; label: process_single) UNZIP_GC(channel.fromPath(file(ascat_loci_gc)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) gc_file = UNZIP_GC.out.unzipped_archive.flatMap { it -> it[1].listFiles() }.collect() + // gc_file: [path, ...] -- GC correction file(s) collected ch_versions = ch_versions.mix(UNZIP_GC.out.versions) } else gc_file = channel.fromPath(ascat_loci_gc).collect() + // Replication timing correction file: RT correction per locus (optional) if (!ascat_loci_rt) rt_file = channel.value([]) else if (ascat_loci_rt.endsWith(".zip")) { + // MODULE: UNZIP_RT (UNZIP alias; label: process_single) UNZIP_RT(channel.fromPath(file(ascat_loci_rt)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) rt_file = UNZIP_RT.out.unzipped_archive.flatMap { it -> it[1].listFiles() }.collect() + // rt_file: [path, ...] -- RT correction file(s) collected ch_versions = ch_versions.mix(UNZIP_RT.out.versions) } else rt_file = channel.fromPath(ascat_loci_rt).collect() } emit: - prepped_fasta = ch_prepared_fasta // [[:], fasta_path] - prepped_fai = ch_prepared_fai // [[:], fai_path] + prepped_fasta = ch_prepared_fasta // [[:], fasta_path] -- uncompressed reference FASTA + prepped_fai = ch_prepared_fai // [[:], fai_path] -- samtools FAI index // ASCAT reference files -- flat file collections (no meta tuple wrapper) - allele_files - loci_files - gc_file - rt_file + // Each is a list of paths collected into a single channel value + allele_files // [path, ...] -- per-chromosome allele frequency files + loci_files // [path, ...] -- per-chromosome loci position files + gc_file // [path, ...] -- GC correction file ([] if not provided) + rt_file // [path, ...] -- replication timing correction file ([] if not provided) - downloaded_clair3_models // [meta(id=clair3_model_id), model_dir] + downloaded_clair3_models // [meta(id=clair3_model_name), model_dir] versions = ch_versions } diff --git a/subworkflows/local/small_variant_consensus.nf b/subworkflows/local/small_variant_consensus.nf index cf3b6e86..9befdb2c 100644 --- a/subworkflows/local/small_variant_consensus.nf +++ b/subworkflows/local/small_variant_consensus.nf @@ -10,50 +10,78 @@ include { BCFTOOLS_SORT } from '../../modul workflow SMALL_VARIANT_CONSENSUS { take: - mixed_vcfs // [meta: w caller_info,mixed_vcfs, mixed_indicies] - fasta - fai - prioritize_caller - combine_method + mixed_vcfs // [meta(+caller field), vcf, tbi] -- one item per caller per sample + // meta.caller is one of: 'clair3', 'clairs-to', 'clairs', 'deepvariant', 'deepsomatic' + fasta // [[:], fasta] + fai // [[:], fai] + prioritize_caller // str: which caller's calls take priority ('deepvariant'/'deepsomatic' or 'clair') + combine_method // str: 'consensus' (intersection only) or 'all' (intersection + private calls from priority caller) main: - //normalize VCFs + + // + // MODULE: BCFTOOLS_NORM (label: process_medium) + // Input: [meta, vcf, tbi] -- per-caller VCF + // Output: .vcf -- [meta, vcf] -- left-aligned, normalised VCF + // .tbi -- [meta, tbi] + // BCFTOOLS_NORM(mixed_vcfs, fasta) BCFTOOLS_NORM.out.vcf .join(BCFTOOLS_NORM.out.tbi) .set {normalized_vcfs} - - // create annotation file with caller name + // normalized_vcfs: [meta(+caller), vcf, tbi] -- normalised per-caller VCF + + // + // MODULE: BCFTOOLS_QUERY (label: process_single) + // Extract variant positions to build a caller-annotation file used by BCFTOOLS_ANNOTATE + // Input: [meta, vcf, tbi] -- normalised VCF + // Output: .output -- [meta, tsv] -- tab-separated annotation file (CHROM POS CALLER) + // .index -- [meta, tbi] + // BCFTOOLS_QUERY(normalized_vcfs, [], [], []) + // Prepare BCFTOOLS_ANNOTATE input: VCF + caller-name annotation file normalized_vcfs .join(BCFTOOLS_QUERY.out.output) .join(BCFTOOLS_QUERY.out.index) .map{ meta, vcf, tbi, annotations, annotations_index -> - def columns = [] - def header_lines = [] - def rename_chrs = [] + def columns = [] // no extra column specs + def header_lines = [] // no extra header lines + def rename_chrs = [] // no chromosome renaming return [ meta, vcf, tbi, annotations, annotations_index, columns, header_lines, rename_chrs ] } .set{annotate_input} - - // Annotate vcfs with caller id + // annotate_input: [meta, vcf, tbi, annotations_tsv, annotations_tbi, [], [], []] + + // + // MODULE: BCFTOOLS_ANNOTATE (label: process_medium) + // Adds CALLER INFO field to each VCF record using the query-generated annotation file + // Input: [meta, vcf, tbi, annotations_tsv, annotations_tbi, [], [], []] + // Output: .vcf -- [meta, vcf] -- VCF with CALLER annotation added + // .tbi -- [meta, tbi] + // BCFTOOLS_ANNOTATE(annotate_input) BCFTOOLS_ANNOTATE.out.vcf .join(BCFTOOLS_ANNOTATE.out.tbi) .set{annotated_vcfs} + // annotated_vcfs: [meta(+caller), vcf, tbi] -- VCF with CALLER INFO tag + + // Branch annotated VCFs by caller family for the intersection step annotated_vcfs .branch { meta, _vcfs, _tbi -> deepvariant: meta.caller in [ 'deepvariant', 'deepsomatic' ] clair: meta.caller in ['clair3','clairs-to','clairs'] } .set{annotated_vcfs_branched} + // annotated_vcfs_branched.deepvariant: [meta(caller=deepvariant/deepsomatic), vcf, tbi] + // annotated_vcfs_branched.clair: [meta(caller=clair3/clairs-to/clairs), vcf, tbi] clair_ch = annotated_vcfs_branched.clair deepvariant_ch = annotated_vcfs_branched.deepvariant + // Strip 'caller' field from meta before joining so both channels share the same key clair_ch. map {meta, vcfs, tbi -> def new_meta = meta.subMap('id', @@ -69,6 +97,7 @@ workflow SMALL_VARIANT_CONSENSUS { return [ new_meta, vcfs, tbi] } .set{clair_ch} + // clair_ch: [meta (no caller), vcf, tbi] deepvariant_ch .map {meta, vcfs, tbi -> @@ -85,7 +114,9 @@ workflow SMALL_VARIANT_CONSENSUS { return [ new_meta, vcfs, tbi] } .set{deepvariant_ch} + // deepvariant_ch: [meta (no caller), vcf, tbi] + // Join DeepVariant and Clair VCFs per sample into a single tuple for BCFTOOLS_ISEC deepvariant_ch .join(clair_ch) .map { meta, deepvar_vcf, deepvar_tbi, clair_vcf, clair_tbi -> @@ -94,19 +125,35 @@ workflow SMALL_VARIANT_CONSENSUS { return [ meta, vcfs, tbis] } .set{mixed_vcfs} + // mixed_vcfs (re-paired): [meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi]] + // Add empty optional fields required by BCFTOOLS_ISEC mixed_vcfs .map{ meta, vcfs, tbis -> - def file = [] - def target = [] - def regions = [] + def file = [] // no regions file + def target = [] // no target sites + def regions = [] // no region string return [meta, vcfs, tbis, file, target, regions] } .set{isec_input} - + // isec_input: [meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi], [], [], []] + + // + // MODULE: BCFTOOLS_ISEC (label: process_medium) + // Computes the intersection and private sets for the two callers + // Input: [meta, [vcf1, vcf2], [tbi1, tbi2], [], [], []] + // Output (custom nf-core module outputs): + // .deepvar_consensus_vcf -- [meta, vcf] -- variants called by both callers (DeepVariant record) + // .clair_consensus_vcf -- [meta, vcf] -- variants called by both callers (Clair record) + // .deepvar_private_vcf -- [meta, vcf] -- variants unique to DeepVariant + // .clair_private_vcf -- [meta, vcf] -- variants unique to Clair + // (+ corresponding .tbi outputs for each) + // BCFTOOLS_ISEC(isec_input) if (combine_method == 'consensus') { + // Take only the intersection: variants called by BOTH callers + // Use the record from the prioritized caller if (prioritize_caller in ['deepvariant', 'deepsomatic']) { BCFTOOLS_ISEC.out.deepvar_consensus_vcf .set{vcf} @@ -119,10 +166,14 @@ workflow SMALL_VARIANT_CONSENSUS { BCFTOOLS_ISEC.out.clair_consensus_tbi .set{tbi} } + // vcf/tbi: [meta, vcf/tbi] -- consensus-only calls from the priority caller } else if (combine_method == 'all') { + // Take the intersection PLUS the private calls from the prioritized caller + // (private calls from the non-priority caller are discarded) if (prioritize_caller in ['deepvariant', 'deepsomatic']) { + // consensus (DeepVariant record) + DeepVariant-private variants BCFTOOLS_ISEC.out.deepvar_consensus_vcf .join(BCFTOOLS_ISEC.out.deepvar_consensus_tbi) .join(BCFTOOLS_ISEC.out.clair_private_vcf) @@ -131,11 +182,13 @@ workflow SMALL_VARIANT_CONSENSUS { return[meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi]] } .set{concat_input} + // concat_input: [meta, [consensus_vcf, private_vcf], [consensus_tbi, private_tbi]] BCFTOOLS_CONCAT(concat_input) BCFTOOLS_CONCAT.out.vcf .set{concat_out} } else if (prioritize_caller == 'clair') { + // consensus (Clair record) + Clair-private variants BCFTOOLS_ISEC.out.deepvar_private_vcf .join(BCFTOOLS_ISEC.out.deepvar_private_tbi) .join(BCFTOOLS_ISEC.out.clair_consensus_vcf) @@ -144,19 +197,22 @@ workflow SMALL_VARIANT_CONSENSUS { return[meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi]] } .set{concat_input} + // concat_input: [meta, [private_vcf, consensus_vcf], [private_tbi, consensus_tbi]] BCFTOOLS_CONCAT(concat_input) BCFTOOLS_CONCAT.out.vcf .set{concat_out} } + // concat_out: [meta, vcf] -- unsorted concatenated VCF (consensus + priority-caller-private) BCFTOOLS_SORT(concat_out) BCFTOOLS_SORT.out.vcf .set{vcf} BCFTOOLS_SORT.out.tbi .set{tbi} + // vcf/tbi: [meta, vcf/tbi] -- sorted combined VCF } emit: - vcf - tbi + vcf // [meta, vcf] -- final consensus/combined VCF + tbi // [meta, tbi] } diff --git a/subworkflows/local/tumor_only/tumoronly_smallvar.nf b/subworkflows/local/tumor_only/tumoronly_smallvar.nf index 4603663a..e9b50e2f 100644 --- a/subworkflows/local/tumor_only/tumoronly_smallvar.nf +++ b/subworkflows/local/tumor_only/tumoronly_smallvar.nf @@ -12,43 +12,64 @@ include { SMALL_VARIANT_CONSENSUS as SOMATIC_CONSENSUS } from '../../../subwork workflow TUMORONLY_SMALLVAR { take: - tumor_bams // [ meta, tumor_bams, tumor_bai ] - fasta - fai - pon_channel + tumor_bams // [meta, tumor_bam, tumor_bai] -- tumor-only aligned BAMs (no matched normal) + fasta // [[:], fasta] + fai // [[:], fai] + pon_channel // [ [pon_vcf_path, ...], [is_population_allele_flag, ...] ] + // used by ClairS-TO to filter germline variants with population allele databases main: - // empty channel emission - ch_versions = channel.empty() somatic_vcf = channel.empty() germline_vcf = channel.empty() somatic_tbi = channel.empty() germline_tbi = channel.empty() - // CLAIRS-TO (SOMATIC/NONGERMLINE VARIANT CALLING) + // CLAIRS-TO: somatic AND germline variant calling from tumor-only BAM + // ClairS-TO uses a panel-of-normals / population allele database to separate somatic from germline + // Runs if either somatic or germline clair calling is requested (produces both jointly) if(params.somatic_var_keep.contains('clair') || params.germline_var_keep.contains('clair')) { + // Append model name and PoN info to build the full CLAIRSTO input tumor_bams .map { meta, bam, bai -> return [ meta, bam, bai, meta.clairSTO_model] } .combine(pon_channel) .set{ clairsto_input_ch} + // clairsto_input_ch: [meta, bam, bai, clairSTO_model_str, [pon_vcf_paths], [pon_flags]] + + // + // MODULE: CLAIRSTO (label: process_high) + // Input: [meta, bam, bai, model_str, [pon_vcfs], [pon_flags]] + // fasta / fai + // Output: .snv_vcf -- [meta, vcf] -- SNV calls (germline + somatic, unsplit) + // .indel_vcf -- [meta, vcf] -- indel calls (germline + somatic, unsplit) + // CLAIRSTO ( clairsto_input_ch, fasta, fai ) - // SPLIT CLAIRSTO GERMLINE AND SOMATIC VARIATION + // ClairS-TO outputs a combined VCF with FILTER tags indicating somatic/germline status; + // VCFSPLIT separates these into two VCFs CLAIRSTO.out.indel_vcf .join(CLAIRSTO.out.snv_vcf) .set{ clairsto_combined_vcf } + // clairsto_combined_vcf: [meta, indel_vcf, snv_vcf] + // + // MODULE: VCFSPLIT (label: process_single) + // Input: [meta, indel_vcf, snv_vcf] -- combined ClairS-TO output + // Output: .germline_vcf -- [meta, vcf] -- germline variants only + // .germline_tbi -- [meta, tbi] + // .somatic_vcf -- [meta, vcf] -- somatic variants only + // .somatic_tbi -- [meta, tbi] + // VCFSPLIT ( clairsto_combined_vcf ) @@ -60,6 +81,7 @@ workflow TUMORONLY_SMALLVAR { return [ new_meta, vcf, tbi] } .set{clairsto_germline_ch} + // clairsto_germline_ch: [meta(+caller:'clairs-to'), vcf, tbi] -- germline variants VCFSPLIT.out.somatic_vcf .join(VCFSPLIT.out.somatic_tbi) @@ -68,25 +90,35 @@ workflow TUMORONLY_SMALLVAR { return [ new_meta, vcf, tbi] } .set{clairsto_somatic_ch} + // clairsto_somatic_ch: [meta(+caller:'clairs-to'), vcf, tbi] -- somatic variants } - // DEEPVARIANT + + // DEEPVARIANT: germline-only variant calling (no somatic mode for tumor-only) if(params.germline_var_keep.contains('deepvariant')) { + + // + // SUBWORKFLOW: DEEPVARIANT (nf-core) + // Input: [meta, bam, bai, []] -- [] = genome-wide (no interval list) + // fasta / fai / [[:],[]] x2 -- empty PAR/GFF + // Output: .vcf -- [meta, vcf] + // .vcf_index -- [meta, tbi] + // tumor_bams .map { meta, bam, bai -> def intervals = [] return [meta,bam,bai, intervals] } .set{deepvariant_input_ch} + // deepvariant_input_ch: [meta, bam, bai, []] DEEPVARIANT ( deepvariant_input_ch, fasta, fai, - [[:],[]], - [[:],[]] + [[:],[]], // PAR regions (not used) + [[:],[]] // GFF annotation (not used) ) - DEEPVARIANT.out.vcf .join(DEEPVARIANT.out.vcf_index) .map{ meta, vcf, tbi -> @@ -94,14 +126,18 @@ workflow TUMORONLY_SMALLVAR { return [new_meta, vcf, tbi] } .set{deepvariant_ch} + // deepvariant_ch: [meta(+caller:'deepvariant'), vcf, tbi] } // COMBINE GERMLINE VARIANTS + // If both callers requested: run consensus; otherwise pass through single-caller output if (params.germline_var_keep.size() > 1) { clairsto_germline_ch .mix(deepvariant_ch) .set{combined_germline_ch} + // combined_germline_ch: [meta(+caller), vcf, tbi] -- one item per caller per sample + // SUBWORKFLOW: GERMLINE_CONSENSUS (SMALL_VARIANT_CONSENSUS alias) GERMLINE_CONSENSUS( combined_germline_ch, fasta, @@ -112,6 +148,7 @@ workflow TUMORONLY_SMALLVAR { GERMLINE_CONSENSUS.out.vcf .join(GERMLINE_CONSENSUS.out.tbi) .set{germline_vcf} + // germline_vcf: [meta(+caller from consensus), vcf, tbi] } else if (params.germline_var_keep == ['clair']) { clairsto_germline_ch @@ -121,7 +158,9 @@ workflow TUMORONLY_SMALLVAR { deepvariant_ch .set{germline_vcf} } - // DEEPSOMATIC + + // DEEPSOMATIC: somatic variant calling in tumor-only mode (no matched normal) + // Normal BAM/BAI are passed as empty lists; DeepSomatic uses the model's internal normal baseline if(params.somatic_var_keep.contains('deepsomatic')) { tumor_bams .map { meta, tumor_bam, tumor_bai -> @@ -130,13 +169,22 @@ workflow TUMORONLY_SMALLVAR { return [meta,normal_bam,normal_bai,tumor_bam,tumor_bai] } .set{deepsomatic_input_ch} + // deepsomatic_input_ch: [meta, [], [], tumor_bam, tumor_bai] + // empty normal_bam/bai signals tumor-only mode to DEEPSOMATIC subworkflow + // + // SUBWORKFLOW: DEEPSOMATIC (local) + // Input: [meta, [], [], tumor_bam, tumor_bai] -- tumor-only (no normal) + // [[:],[]] / fasta / fai / [[:],[]] + // Output: .vcf -- [meta, vcf] + // .vcf_index -- [meta, tbi] + // DEEPSOMATIC ( deepsomatic_input_ch, - [[:],[]], + [[:],[]], // intervals (empty = genome-wide) fasta, fai, - [[:],[]] + [[:],[]] // GZI (empty if FASTA is uncompressed) ) DEEPSOMATIC.out.vcf .join(DEEPSOMATIC.out.vcf_index) @@ -145,13 +193,17 @@ workflow TUMORONLY_SMALLVAR { return [new_meta, vcf, tbi] } .set{deepsomatic_ch} + // deepsomatic_ch: [meta(+caller:'deepsomatic'), vcf, tbi] } + // COMBINE SOMATIC VARIATION if (params.somatic_var_keep.size() > 1) { clairsto_somatic_ch .mix(deepsomatic_ch) .set{combined_somatic_ch} + // combined_somatic_ch: [meta(+caller), vcf, tbi] -- one item per caller per sample + // SUBWORKFLOW: SOMATIC_CONSENSUS (SMALL_VARIANT_CONSENSUS alias) SOMATIC_CONSENSUS( combined_somatic_ch, fasta, @@ -162,6 +214,7 @@ workflow TUMORONLY_SMALLVAR { SOMATIC_CONSENSUS.out.vcf .join(SOMATIC_CONSENSUS.out.tbi) .set{somatic_vcf} + // somatic_vcf: [meta(+caller from consensus), vcf, tbi] } else if (params.somatic_var_keep == ['clair']) { clairsto_somatic_ch @@ -172,6 +225,7 @@ workflow TUMORONLY_SMALLVAR { .set{somatic_vcf} } + // Strip 'caller' from meta before emitting both VCFs somatic_vcf .map{ meta, vcf, tbi -> def new_meta = meta.subMap('id', @@ -201,9 +255,10 @@ workflow TUMORONLY_SMALLVAR { return[new_meta, vcf, tbi] } .set{germline_vcf} + emit: - somatic_vcf - germline_vcf + somatic_vcf // [meta, vcf, tbi] -- final somatic VCF (ClairS-TO, DeepSomatic, or consensus) + germline_vcf // [meta, vcf, tbi] -- final germline VCF (ClairS-TO germline, DeepVariant, or consensus) } diff --git a/subworkflows/local/utils_nfcore_lrsomatic_pipeline/main.nf b/subworkflows/local/utils_nfcore_lrsomatic_pipeline/main.nf index 94d8789b..34db6e1f 100644 --- a/subworkflows/local/utils_nfcore_lrsomatic_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_lrsomatic_pipeline/main.nf @@ -84,17 +84,23 @@ workflow PIPELINE_INITIALISATION { // Create channel from input file provided through params.input // + // Parse the input samplesheet CSV and build a per-sample BAM channel + // Each samplesheet row describes one tumor (+ optional normal) sample + // Columns: sample_id, bam_tumor, bam_normal, method, sex, fiber, + // clair3_model, clairSTO_model, clairS_model, tumor_replicate, normal_replicate channel .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + // Step 1: build a combined meta map from the samplesheet columns + // paired_data = true if a normal BAM is present; false for tumor-only .map { meta, bam_tumor, bam_normal, method, sex, fiber, clair3_model, clairSTO_model, clairS_model, tumor_replicate, normal_replicate -> def real_clair3_model = (clair3_model == null ) ? null : clair3_model def real_clairS_model = (clairS_model == null ) ? null : clairS_model def real_clairSTO_model = (clairSTO_model == null ) ? null : clairSTO_model def paired_data = bam_normal ? true : false def meta_info = meta + [ paired_data: paired_data, - platform: method, - sex: sex, - fiber: fiber, + platform: method, // 'ont' or 'pb' + sex: sex, // 'XX', 'XY', or null (for ASCAT) + fiber: fiber, // 'y' or 'n' (fiber-seq data flag) clair3_model: real_clair3_model, clairS_model: real_clairS_model, clairSTO_model: real_clairSTO_model, @@ -102,9 +108,13 @@ workflow PIPELINE_INITIALISATION { normal_replicate: normal_replicate] return [ meta_info, [ bam_tumor ], [ bam_normal ?: [] ] ] } + // Flatten BAM lists (handles multi-run entries where bam_tumor/bam_normal are lists) .map { meta, bam_tumor, bam_normal -> [ meta, bam_tumor.flatten(), bam_normal.flatten() ] } + // Step 2: split each row into separate tumor and normal items + // flatMap emits 1 item (tumor-only) or 2 items (tumor + normal) per samplesheet row + // Each item gets type='tumor' or type='normal' and the appropriate replicate ID .flatMap { meta, tumor_bam, normal_bam -> def meta_tumor = meta.clone() meta_tumor.type = 'tumor' @@ -120,6 +130,7 @@ workflow PIPELINE_INITIALISATION { 'clairSTO_model', 'replicate') def result = [[meta_tumor, tumor_bam]] + // result so far: [[meta_tumor, [tumor_bam_path...]]] if (normal_bam) { def meta_normal = meta.clone() @@ -136,17 +147,24 @@ workflow PIPELINE_INITIALISATION { 'clairSTO_model', 'replicate') result << [meta_normal, normal_bam] + // result now: [[meta_tumor, [tumor_bams]], [meta_normal, [normal_bams]]] } return result } .set { ch_samplesheet } - - // ch_samplesheet -> meta: [id, paired_data, platform, sex, type] - // bam: unaligned bams + // ch_samplesheet: [meta, [bam...]] + // meta fields: id, paired_data, type ('tumor'|'normal'), platform ('ont'|'pb'), + // sex, fiber ('y'|'n'), clair3_model, clairS_model, clairSTO_model, replicate + // paired_data: true for both items in a T/N pair (same value for tumor AND normal rows) + // bam: list of paths (multiple runs for same sample remain as a list until SAMTOOLS_CAT) + // + // NOTE: tumor-only rows emit ONE item (type='tumor', paired_data=false) + // paired rows emit TWO items — tumor (paired_data=true) + normal (paired_data=true) + // Both share the same 'id' to allow downstream joins emit: - samplesheet = ch_samplesheet + samplesheet = ch_samplesheet // [meta, [bam...]] -- see channel structure above versions = ch_versions } diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 1eba0cf8..fcdc165a 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -58,6 +58,15 @@ "perl-math-cdf": 0.1, "tabix": 1.21 }, + "LONGPHASE_HAPLOTAG": { + "longphase": "2.0.1" + }, + "LONGPHASE_PHASE_GERMLINE": { + "longphase": "2.0.1" + }, + "LONGPHASE_PHASE_SOMATIC": { + "longphase": "2.0.1" + }, "METAEXTRACT": { "samtools": 1.21 }, @@ -537,56 +546,56 @@ "sample3/vep/somatic/sample3_SOMATIC_VEP.vcf.gz_summary.html" ], [ - "sample1_normal.bam:md5,186e4e8400cce1f02190fa91ad449271", - "sample1_normal.bam.bai:md5,cabfbe44aa1f0fb6cf5b4d54e6c4d811", - "sample1_tumor.bam:md5,2887783d87d9e4dedbbca367d5e4efdb", - "sample1_tumor.bam.bai:md5,9eb6cf08de5a60644fa54c8810e3dc58", + "sample1_normal.bam:md5,3ce847c38eb619781e32a10c28e0c35c", + "sample1_normal.bam.bai:md5,8dd8c7fa037badc7097067d5a88672cd", + "sample1_tumor.bam:md5,ed5eb35b63d5e92fa8e461b9a1732b21", + "sample1_tumor.bam.bai:md5,21018d3f1f85be74fd7dc66873219b05", "sample1.flagstat:md5,1c41ea9923945501eb7e41f83a90502d", "sample1.idxstats:md5,902e503387799123ea59255e3fca172c", "sample1.stats:md5,70fabbdc07dec0479b3fc7dcec344054", "sample1.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", "sample1.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", "sample1.stats:md5,5012c82d3d3ca60ffdd2fb970f772566", - "sample1_whatshap_stats.gtf:md5,32e249c78790982098e4b0a606171d69", - "sample1_whatshap_stats.log:md5,407b5fcdfbeec1830b2ed6f65f1c2c18", - "sample1_whatshap_stats.tsv:md5,41290e994b5e6dab5ed696925cbb0716", + "sample1_whatshap_stats.gtf:md5,428ca0e0f48dc2e3e1b978fa7cf720f3", + "sample1_whatshap_stats.log:md5,5c1f0f79a60a6879b75271fa94b620e8", + "sample1_whatshap_stats.tsv:md5,98582c7e0ff74a2a1978bf70ac9926ee", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "breakpoints_double.csv:md5,d63f058075bfe791248954ca2ee6c4fb", - "read_qual.txt:md5,78247dfa2ea336eac0e128eba5e9eef4", + "breakpoints_double.csv:md5,27b409c73dd0d8bde316545f86ac7f15", + "read_qual.txt:md5,fbe6cd0b65cbfc1ca699e252e531ab72", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "sample2_normal.bam:md5,aff37a8ad733e11fd20978392810e8d8", - "sample2_normal.bam.bai:md5,64744e26f51927c77fd48c282f6ec07d", - "sample2_tumor.bam:md5,9e14b05c07bde2a4653072cba2161a67", - "sample2_tumor.bam.bai:md5,04f64a62a741c7b725877e5d37ceff2f", + "sample2_normal.bam:md5,2ecddb6279310240f2dc29b0cf7f8c84", + "sample2_normal.bam.bai:md5,9caebccee8882bcf95e65631c0ac6730", + "sample2_tumor.bam:md5,c8db7cc4b189dec3fadd2cce07d9fcae", + "sample2_tumor.bam.bai:md5,28827d78aa318e26f71b68581ed5c607", "sample2.flagstat:md5,714d0cc0c213e2640e54a16f3d0e6e7e", "sample2.idxstats:md5,72eb83bb11748dc863fef1a0a5497e4b", "sample2.stats:md5,87cb6e9adf8a133244e8b331be43bb14", "sample2.flagstat:md5,4344a8745efef9cc2a017024218d61c6", "sample2.idxstats:md5,69467fc02c83a30084736aeea8b785fb", "sample2.stats:md5,1e044857eeefb284fda88ee58ff7a04a", - "sample2_whatshap_stats.gtf:md5,af33281699a1d0da83fbe7eaff198d03", - "sample2_whatshap_stats.log:md5,8f5f400786f32871c16e523d9e236fc4", - "sample2_whatshap_stats.tsv:md5,e8b67840491b7d092ac3d5d91db0ff46", + "sample2_whatshap_stats.gtf:md5,a13f0ac1edd7abde4ad013bf2619fe0f", + "sample2_whatshap_stats.log:md5,1d5ed1faca328d3014e9b14a44d18a23", + "sample2_whatshap_stats.tsv:md5,a275209ef9e7885ee5ea3a4aa1c970fd", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "breakpoints_double.csv:md5,a68d7fc9c7a7cb2f31e73189c5412f7b", - "read_qual.txt:md5,8b92ff7dc4536188be159b95525511cd", + "breakpoints_double.csv:md5,7d2fe02046bd2ff7138b46d8f67fc755", + "read_qual.txt:md5,fe3f87458d7c0c6591c37e1fd70cecf2", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "sample3_tumor.bam:md5,13432ff8635f1d142f5f260676930754", - "sample3_tumor.bam.bai:md5,b5ace84a3a8619a93227af01e211b1b9", + "sample3_tumor.bam:md5,f5dfadf92345cd529de4c1919f82b632", + "sample3_tumor.bam.bai:md5,f8ca34c0584329c62cb70ce8fb942cb6", "sample3.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", "sample3.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", "sample3.stats:md5,d7a8552a8a41a217954a0c825d468a60", - "sample3_whatshap_stats.gtf:md5,415b20e0cc30409d24501d64b185dc49", - "sample3_whatshap_stats.log:md5,99a842c8f8f3259ec66b68e8fe0345e6", - "sample3_whatshap_stats.tsv:md5,a65d179e31756ae4127f0bf74da7e701", + "sample3_whatshap_stats.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample3_whatshap_stats.log:md5,ca067293878d1760638626a8c5a31432", + "sample3_whatshap_stats.tsv:md5,62beceb9731cafc620ce5c6eb07a9cc9", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", - "breakpoints_double.csv:md5,46e03bf2d67aa736b599f00fe7f01e06", - "read_qual.txt:md5,b918430d35354dad1d7f02f21e4cd4ed", + "breakpoints_double.csv:md5,a9a0e0a75975904952788c2a0bd3fa85", + "read_qual.txt:md5,25efaa43bb81a4592bfb8f5f08f84b34", "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50" ] @@ -595,6 +604,6 @@ "nf-test": "0.9.3", "nextflow": "25.10.2" }, - "timestamp": "2026-03-26T16:13:56.877873308" + "timestamp": "2026-03-27T17:04:12.049740619" } } \ No newline at end of file diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index 5a4e1a5e..10da2149 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -136,6 +136,9 @@ workflow LRSOMATIC { Channel .of( tuple(pon_files, pon_flags) ) .set { pon_channel } + // pon_channel: [ [pon_vcf_path, ...], [is_population_allele_flag, ...] ] + // -- single tuple of parallel lists; each flag indicates whether the corresponding VCF + // is a population allele database (True) vs. a panel-of-normals artefact file (False) ch_versions = channel.empty() ch_multiqc_files = channel.empty() @@ -145,11 +148,14 @@ workflow LRSOMATIC { // // extracts the base calling model from the bam files + // MODULE: METAEXTRACT (label: process_single) + // Input: [meta, [bam...]] METAEXTRACT( ch_samplesheet ) basecall_meta = METAEXTRACT.out.meta_ext - // [meta, basecall_model_str, kinetics_str] -- basecall model and kinetics extracted from BAM header - // Adds the base calling model to meta.basecall_model + // basecall_meta: [meta, basecall_model_str, kinetics_str] + // basecall_model_str -- e.g. "dna_r10.4.1_e8.2_400bps_sup@v5.0.0" or "hifi_revio" + // kinetics_str -- "true" if PacBio kinetics tags present, else "false" ch_samplesheet .join(basecall_meta) @@ -175,10 +181,20 @@ workflow LRSOMATIC { [ meta, bam.flatten()] } .set{ch_samplesheet} - // [meta_full, [bam...]] -- meta now includes: id, paired_data, type, platform, sex, fiber, clair3_model, clairS_model, clairSTO_model, kinetics + // ch_samplesheet (updated): [meta, [bam...]] + // meta fields: id, paired_data, type, platform, sex, fiber, replicate, + // clair3_model, clairS_model, clairSTO_model, kinetics + // bams are grouped per sample (multiple runs merged into a list) // // SUBWORKFLOW: PREPARE_REFERENCE_FILES + // Decompresses the reference FASTA if needed, indexes it, downloads Clair3 models, + // and decompresses ASCAT reference files + // Input: params.fasta, ASCAT file paths, basecall_meta, clair3_modelMap + // Output: .prepped_fasta -- [[:], fasta] + // .prepped_fai -- [[:], fai] + // .downloaded_clair3_models-- [meta(id=model_name), model_dir] + // .allele_files / .loci_files / .gc_file / .rt_file -- flat file collections // PREPARE_REFERENCE_FILES ( @@ -192,13 +208,16 @@ workflow LRSOMATIC { ) downloaded_clair3_models = PREPARE_REFERENCE_FILES.out.downloaded_clair3_models + // downloaded_clair3_models: [meta(id=clair3_model_name), model_dir] ch_nanoplot_pre_txt = channel.empty() if (!params.skip_qc && !params.skip_cramino) { // - // Module: CRAMINO + // MODULE: CRAMINO_PRE (label: process_medium) + // Input: [meta, [bam...]] -- pre-alignment unaligned BAMs + // Output: cramino_pre.out.arrow -- [meta, arrow_file] (feather format stats) // CRAMINO_PRE( ch_samplesheet ) @@ -206,7 +225,9 @@ workflow LRSOMATIC { if (!params.skip_nanoplot) { // - // Module: Nanoplot + // MODULE: NANOPLOT_PRE (label: process_medium) + // Input: CRAMINO_PRE.out.arrow -- [meta, arrow_file] + // Output: nanoplot HTML/txt reports // NANOPLOT_PRE(CRAMINO_PRE.out.arrow) @@ -215,6 +236,7 @@ workflow LRSOMATIC { } + // Drop 'replicate' from meta before concatenation -- replicate info not needed downstream ch_samplesheet .map{ meta, bam -> def new_meta = meta.subMap('id', @@ -230,42 +252,40 @@ workflow LRSOMATIC { return[new_meta, bam] } .set{ch_samplesheet_no_rep} + // ch_samplesheet_no_rep: [meta, [bam...]] + // meta fields: id, paired_data, type, platform, sex, fiber, + // clair3_model, clairS_model, clairSTO_model, kinetics + // (replicate field removed; bams still a list — concatenated next) - - // ch_samplesheet -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // bam: list of unaligned bams - + // Branch on number of input BAMs: samples with a single BAM skip concatenation + ch_split = ch_samplesheet_no_rep .branch { _meta, bam -> single: bam.size() == 1 multiple: bam.size() > 1 } + // ch_split.single: [meta, [bam]] -- pass-through, no concatenation needed + // ch_split.multiple: [meta, [bam...]] -- need SAMTOOLS_CAT to merge // - // MODULE: SAMTOOLS_CAT + // MODULE: SAMTOOLS_CAT (label: process_single) + // Input: [meta, [bam...]] -- multiple unaligned BAMs for same sample + // Output: .bam -- [meta, bam] -- single merged unaligned BAM // - // concatenates bam files from single sample SAMTOOLS_CAT ( ch_split.multiple ) .bam .mix ( ch_split.single ) .set { ch_cat_ubams } - // [meta, bam] -- single merged unaligned BAM per sample + // ch_cat_ubams: [meta, bam] -- single (possibly concatenated) unaligned BAM per sample vep_cache = channel.empty() if (!params.skip_vep) { - channel - .of([ - vep_cache: params.vep_cache, - vep_cache_version: params.vep_cache_version, - vep_genome: params.vep_genome, - vep_args: params.vep_args, - vep_species: params.vep_species, - download_vep_cache: params.download_vep_cache - ]) - + // SUBWORKFLOW: PREPARE_ANNOTATION + // Validates or downloads the VEP cache directory + // Output: .vep_cache -- path to VEP cache root directory PREPARE_ANNOTATION ( params.vep_cache, params.vep_cache_version, @@ -275,19 +295,21 @@ workflow LRSOMATIC { params.download_vep_cache ) ch_versions = ch_versions.mix(PREPARE_ANNOTATION.out.versions) + // Wrap VEP cache path in a tuple with empty meta for use in ENSEMBLVEP_VEP vep_cache = PREPARE_ANNOTATION.out.vep_cache.map {cache -> [[:], cache] } + // vep_cache: [[:], cache_dir_path] -- empty meta + VEP cache directory } ch_versions = ch_versions.mix(PREPARE_REFERENCE_FILES.out.versions) - ch_fasta = PREPARE_REFERENCE_FILES.out.prepped_fasta - ch_fai = PREPARE_REFERENCE_FILES.out.prepped_fai + ch_fasta = PREPARE_REFERENCE_FILES.out.prepped_fasta // [[:], fasta] + ch_fai = PREPARE_REFERENCE_FILES.out.prepped_fai // [[:], fai] - // ASCAT files - allele_files = PREPARE_REFERENCE_FILES.out.allele_files - loci_files = PREPARE_REFERENCE_FILES.out.loci_files - gc_file = PREPARE_REFERENCE_FILES.out.gc_file - rt_file = PREPARE_REFERENCE_FILES.out.rt_file + // ASCAT reference files -- flat path collections (no meta wrapper), passed directly to ASCAT module + allele_files = PREPARE_REFERENCE_FILES.out.allele_files // [path, ...] -- per-chromosome allele files + loci_files = PREPARE_REFERENCE_FILES.out.loci_files // [path, ...] -- per-chromosome loci files + gc_file = PREPARE_REFERENCE_FILES.out.gc_file // [path, ...] -- GC correction ([] if skipped) + rt_file = PREPARE_REFERENCE_FILES.out.rt_file // [path, ...] -- RT correction ([] if skipped) // // MODULE: FIBERTOOLSRS_PREDICTM6A @@ -295,39 +317,57 @@ workflow LRSOMATIC { // predict m6a in unaligned bam if (!params.skip_fiber) { + // Fiber-seq processing: predict m6A methylation, call nucleosomes and FIRE elements + // Only applicable to PacBio samples with fiber-seq data (meta.fiber == "y") if (!params.skip_normalfiber){ + // Process all samples (including normals) for fiber-seq ubams = ch_cat_ubams } else { + // Skip fiber-seq processing for normal samples; set aside normals to re-join later ch_cat_ubams .branch { meta, _bams -> normal: meta.type == "normal" tumor: meta.type == "tumor" } .set { ch_cat_ubams_normal_branching } + // ch_cat_ubams_normal_branching.normal: [meta, bam] -- normal samples (held out) + // ch_cat_ubams_normal_branching.tumor: [meta, bam] -- tumor samples only normal_bams = ch_cat_ubams_normal_branching.normal ubams = ch_cat_ubams_normal_branching.tumor } + // Branch by sequencing platform: PacBio needs m6A prediction, ONT does not ubams .branch{ meta, _bams -> pacBio: meta.platform == "pb" ont: meta.platform == "ont" } .set{ch_cat_ubams_pacbio_ont_branching} + // ch_cat_ubams_pacbio_ont_branching.pacBio: [meta, bam] -- PacBio samples + // ch_cat_ubams_pacbio_ont_branching.ont: [meta, bam] -- ONT samples (skip m6A) pacbio_bams = ch_cat_ubams_pacbio_ont_branching.pacBio + // Branch PacBio samples: only those with kinetics tags can have m6A predicted pacbio_bams .branch{meta, _bams -> kinetics: meta.kinetics == "true" noKinetics: meta.kinetics == "false" } .set{pacbio_bams} + // pacbio_bams.kinetics: [meta, bam] -- PacBio with kinetics (mm/ml tags); m6A predictable + // pacbio_bams.noKinetics: [meta, bam] -- PacBio without kinetics; skip PREDICTM6A if (!params.skip_m6a) { + // + // MODULE: FIBERTOOLSRS_PREDICTM6A (label: process_high) + // Input: [meta, bam] -- PacBio BAM with kinetics tags + // Output: .bam -- [meta, bam] -- BAM with m6A (MM/ML) tags added + // FIBERTOOLSRS_PREDICTM6A ( pacbio_bams.kinetics ) + // Merge PacBio with and without kinetics: both now have (or skip) m6A tags pacbio_bams.noKinetics .mix(FIBERTOOLSRS_PREDICTM6A.out.bam) .set{predicted_bams} @@ -337,22 +377,28 @@ workflow LRSOMATIC { .mix(pacbio_bams.kinetics) .set{predicted_bams} } + // predicted_bams: [meta, bam] -- all PacBio samples (m6A tags present where applicable) - - + // Re-merge ONT and PacBio before fiber-seq branching ch_cat_ubams_pacbio_ont_branching.ont .mix(predicted_bams) .set{fiber_branch} + // fiber_branch (pre-split): [meta, bam] -- all samples (ONT + PacBio, with m6A if applicable) + // Branch on fiber-seq flag: only fiber-seq samples get nucleosome/FIRE calling fiber_branch .branch{ meta, _bams -> fiber: meta.fiber == "y" nonFiber: meta.fiber == "n" } .set{fiber_branch} + // fiber_branch.fiber: [meta, bam] -- fiber-seq samples → nucleosome + FIRE calling + // fiber_branch.nonFiber: [meta, bam] -- non-fiber samples → passed through unchanged // - // MODULE: FIBERTOOLSRS_NUCLEOSOMES + // MODULE: FIBERTOOLSRS_NUCLEOSOMES (label: process_high) + // Input: [meta, bam] -- fiber-seq BAM (with m6A tags for PacBio) + // Output: .bam -- [meta, bam] -- BAM with nucleosome footprint tags added // FIBERTOOLSRS_NUCLEOSOMES ( @@ -360,7 +406,9 @@ workflow LRSOMATIC { ) // - // MODULE: FIBERTOOLSRS_FIRE + // MODULE: FIBERTOOLSRS_FIRE (label: process_high) + // Input: FIBERTOOLSRS_NUCLEOSOMES.out.bam -- [meta, bam] -- BAM with nucleosome tags + // Output: .bam -- [meta, bam] -- BAM with FIRE (Fiber-seq Inferred Regulatory Elements) tags // FIBERTOOLSRS_FIRE ( @@ -368,22 +416,26 @@ workflow LRSOMATIC { ) if (!params.skip_normalfiber){ + // Re-merge fiber and non-fiber samples after FIRE annotation fiber_branch.nonFiber .mix(FIBERTOOLSRS_FIRE.out.bam) .set{ch_cat_ubams} - } else { + // Re-merge fiber, non-fiber, and held-out normal samples fiber_branch.nonFiber .mix(normal_bams) .mix(FIBERTOOLSRS_FIRE.out.bam) .set{ch_cat_ubams} - } + // ch_cat_ubams (updated): [meta, bam] -- all samples; fiber-seq samples now carry + // nucleosome + FIRE tags in BAM; m6A tags present for PacBio fiber-seq if(!params.skip_qc) { // - // MODULE: FIBERTOOLSRS_QC + // MODULE: FIBERTOOLSRS_QC (label: process_medium) + // Input: FIBERTOOLSRS_FIRE.out.bam -- [meta, bam] -- annotated fiber-seq BAM + // Output: QC reports for fiber-seq signal (written to outdir) // FIBERTOOLSRS_QC ( @@ -392,10 +444,13 @@ workflow LRSOMATIC { } } // - // MODULE: MINIMAP2_ALIGN + // MODULE: MINIMAP2_ALIGN (label: process_high) + // Input: [meta, bam] -- unaligned BAM (may carry m6A/nucleosome/FIRE tags for fiber-seq) + // ch_fasta -- [[:], fasta] + // sort_bam=true, cigar_paf_format='bai', cigar_bam='', split_prefix='' + // Output: .bam -- [meta, bam] -- coordinate-sorted aligned BAM + // .index -- [meta, bai] -- BAM index // - // Aligns ubams - // ch_cat_ubams: [meta, bam] -- may include m6A/nucleosome/FIRE annotations for fiber-seq samples MINIMAP2_ALIGN ( ch_cat_ubams, @@ -407,25 +462,29 @@ workflow LRSOMATIC { ) MINIMAP2_ALIGN.out.bam .set { ch_minimap_bam } - // [meta, bam] -- aligned BAM - - // ch_minimap_bams into tumor and paired to phase the paired ones on normal - // and add index + // ch_minimap_bam: [meta, bam] -- coordinate-sorted aligned BAM + // Join BAM with its index, then branch into paired-sample vs. tumor-only paths ch_minimap_bam .join(MINIMAP2_ALIGN.out.index) .set {ch_index_minimap} + // ch_index_minimap: [meta, bam, bai] -- aligned BAM + index, all samples ch_index_minimap .branch { meta, _bams, _bais -> - paired: meta.paired_data - tumor_only: !meta.paired_data + paired: meta.paired_data // meta.paired_data is the normal sample ID for tumors, or the tumor ID for normals + tumor_only: !meta.paired_data // meta.paired_data is null/false for tumor-only samples } .set { branched_minimap } - // branched_minimap.paired: [meta, bam, bai] -- one item per sample (tumor AND normal flow separately) - // branched_minimap.tumor_only: [meta, bam, bai] + // branched_minimap.paired: [meta, bam, bai] -- tumor AND normal samples flow together here; + // each item is a single sample, joined downstream + // branched_minimap.tumor_only: [meta, bam, bai] -- tumor-only samples (no matched normal) + // SUBWORKFLOW: TUMORONLY_SMALLVAR + // Input: branched_minimap.tumor_only -- [meta, bam, bai] + // Output: .somatic_vcf -- [meta, vcf, tbi] -- somatic SNVs/indels + // .germline_vcf -- [meta, vcf, tbi] -- germline SNVs/indels (ClairS-TO germline output) TUMORONLY_SMALLVAR( branched_minimap.tumor_only, ch_fasta, @@ -436,13 +495,17 @@ workflow LRSOMATIC { branched_minimap.paired .set{paired_ch} + // Split paired samples into tumor and normal streams for joining paired_ch .branch { meta, _bams, _bais -> normal: meta.type == "normal" tumor: meta.type == "tumor" } .set{branched_paired_ch} + // branched_paired_ch.normal: [meta, bam, bai] -- normal samples (meta.type == "normal") + // branched_paired_ch.tumor: [meta, bam, bai] -- tumor samples (meta.type == "tumor") + // Strip 'type' field from normal meta before joining, so the key is just sample ID branched_paired_ch.normal .map{ meta, bam, bai -> def new_meta = meta.subMap('id', @@ -457,7 +520,10 @@ workflow LRSOMATIC { return[new_meta, bam, bai] } .set{paired_normal_bams} + // paired_normal_bams: [meta (no type), normal_bam, normal_bai] + // Join tumor and normal BAMs into a single channel for somatic variant calling + // Join key is meta (with 'type' stripped), so tumor meta.id must equal normal meta.id branched_paired_ch.tumor .map{ meta, bam, bai -> def new_meta = meta.subMap('id', @@ -473,12 +539,21 @@ workflow LRSOMATIC { } .join(paired_normal_bams) .set { somatic_smallvar_input } + // somatic_smallvar_input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + // SUBWORKFLOW: PAIRED_SMALLVAR_SOMATIC + // Input: somatic_smallvar_input -- [meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + // Output: .somatic_vcf -- [meta, vcf, tbi] -- somatic SNVs/indels (ClairS and/or DeepSomatic consensus) PAIRED_SMALLVAR_SOMATIC ( somatic_smallvar_input, ch_fasta, ch_fai ) + + // SUBWORKFLOW: PAIRED_SMALLVAR_GERMLINE + // Input: branched_paired_ch.normal -- [meta, bam, bai] -- normal sample BAMs only + // downloaded_clair3_models -- [meta(id=model_name), model_dir] + // Output: .germline_vcf -- [meta, vcf, tbi] -- germline SNVs/indels (Clair3 and/or DeepVariant consensus) PAIRED_SMALLVAR_GERMLINE ( branched_paired_ch.normal, ch_fasta, @@ -486,15 +561,26 @@ workflow LRSOMATIC { downloaded_clair3_models ) + // Merge germline VCFs from paired and tumor-only paths into a single channel PAIRED_SMALLVAR_GERMLINE.out.germline_vcf .mix(TUMORONLY_SMALLVAR.out.germline_vcf) .set{ch_germline_vcf} + // ch_germline_vcf: [meta, vcf, tbi] -- germline variants for all samples (paired + tumor-only) - + // Merge somatic VCFs from tumor-only and paired T/N paths into a single channel TUMORONLY_SMALLVAR.out.somatic_vcf .mix(PAIRED_SMALLVAR_SOMATIC.out.somatic_vcf) .set{ch_somatic_vcf} - + // ch_somatic_vcf: [meta, vcf, tbi] -- somatic variants for all samples + + // SUBWORKFLOW: PHASING_HAPLOTYPING + // Input: ch_index_minimap -- [meta, bam, bai] -- all aligned BAMs (tumor + normal + tumor-only) + // ch_germline_vcf -- [meta, vcf, tbi] -- germline variants (used to phase reads) + // ch_somatic_vcf -- [meta, vcf, tbi] -- somatic variants (get phasing transferred) + // ch_fasta / ch_fai + // Output: .phased_germline_vcf -- [meta, vcf, tbi] -- phased germline VCF + // .phased_somatic_vcf -- [meta, vcf, tbi] -- phased somatic VCF + // .tumor_normal_hapbams_ch -- [meta, bam, bai] -- haplotagged BAMs (all samples) PHASING_HAPLOTYPING ( ch_index_minimap, ch_germline_vcf, @@ -503,13 +589,14 @@ workflow LRSOMATIC { ch_fai ) - + // Prepare phased VCFs for VEP: add empty 'extra' list required by ENSEMBLVEP_VEP PHASING_HAPLOTYPING.out.phased_somatic_vcf .map { meta, vcf, _tbi -> def extra = [] return [meta, vcf, extra] } .set { somatic_vep } + // somatic_vep: [meta, vcf, []] -- phased somatic VCF ready for VEP annotation PHASING_HAPLOTYPING.out.phased_germline_vcf .map { meta, vcf, _tbi -> @@ -517,24 +604,25 @@ workflow LRSOMATIC { return [meta, vcf, extra] } .set { germline_vep } - - /// figure out severus channel structure then test - - // [meta, vcf, []] -- somatic variants merged from T/N and tumor-only paths + // germline_vep: [meta, vcf, []] -- phased germline VCF ready for VEP annotation whatshap_stats_txt = channel.empty() if (!params.skip_qc && !params.skip_whatshapstats) { - // Create channel for whatshap stats + // Drop the empty 'extra' element added for VEP input germline_vep .map { meta, vcf, _extra -> return [meta, vcf] } .set { ch_whatshap_stats } + // ch_whatshap_stats: [meta, vcf] -- phased germline VCF for phasing QC // - // Module: WHATSHAP_STATS + // MODULE: WHATSHAP_STATS (label: process_single) + // Input: [meta, vcf] -- phased VCF (germline) + // gtf=true, sample=true, chr_lengths=false + // Output: .tsv -- [meta, tsv] -- per-chromosome phasing statistics // WHATSHAP_STATS ( @@ -551,7 +639,11 @@ workflow LRSOMATIC { if (!params.skip_vep) { // - // MODULE: GERMLINE_VEP + // MODULE: GERMLINE_VEP (ENSEMBLVEP_VEP alias; label: process_medium) + // Input: germline_vep -- [meta, vcf, []] -- phased germline VCF + // vep_cache -- [[:], cache_dir] + // ch_fasta -- [[:], fasta] + // Output: annotated germline VCF with consequence predictions // if (params.vep_custom != null) { vep_custom = file(params.vep_custom) @@ -576,7 +668,11 @@ workflow LRSOMATIC { ) // - // MODULE: SOMATIC_VEP + // MODULE: SOMATIC_VEP (ENSEMBLVEP_VEP alias; label: process_medium) + // Input: somatic_vep -- [meta, vcf, []] -- phased somatic VCF + // vep_cache -- [[:], cache_dir] + // ch_fasta -- [[:], fasta] + // Output: annotated somatic VCF with consequence predictions // SOMATIC_VEP ( @@ -592,6 +688,8 @@ workflow LRSOMATIC { ) } + // Build SEVERUS input by combining tumor-only and T/N paired samples with phased germline VCFs + // Tumor-only samples get empty lists for normal BAM/BAI (SEVERUS runs in tumor-only mode) branched_minimap.tumor_only .map{ meta, bam, bai -> def new_meta = meta.subMap('id', @@ -610,12 +708,19 @@ workflow LRSOMATIC { def normal_bai = [] return [meta, tumor_bam, tumor_bai, normal_bam, normal_bai] } + // Mix with paired T/N input (which already has normal BAM/BAI from somatic_smallvar_input) .mix(somatic_smallvar_input) + // Attach phased germline VCF (used by SEVERUS for phased SV calling) .join(PHASING_HAPLOTYPING.out.phased_germline_vcf) .set{severus_input} + // severus_input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_germline_vcf, phased_germline_tbi] + // normal_bam/bai are empty lists [] for tumor-only samples // - // MODULE: SEVERUS + // MODULE: SEVERUS (label: process_high) + // Input: severus_input -- [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf, tbi] + // [[:], bed_file, pon_file] -- optional target BED and panel-of-normals for SV filtering + // Output: .all_vcf -- [meta, vcf] -- all somatic SVs (sniffles2 format) // SEVERUS ( @@ -631,9 +736,14 @@ workflow LRSOMATIC { return [meta, vcf, extra] } .set { sv_vep } - // [meta, severus_all_vcf, []] -- all SVs for VEP annotation + // sv_vep: [meta, severus_all_vcf, []] -- all SVs ready for VEP annotation if(!params.skip_vep) { + // + // MODULE: SV_VEP (ENSEMBLVEP_VEP alias; label: process_medium) + // Input: sv_vep -- [meta, vcf, []] -- SEVERUS SV VCF + // Output: annotated SV VCF with consequence predictions + // SV_VEP ( sv_vep, params.vep_genome, @@ -654,7 +764,9 @@ workflow LRSOMATIC { if (!params.skip_qc && !params.skip_cramino) { // - // MODULE: CRAMINO + // MODULE: CRAMINO_POST (label: process_medium) + // Input: ch_minimap_bam -- [meta, bam] -- post-alignment coordinate-sorted BAM + // Output: .arrow -- [meta, arrow_file] -- alignment statistics in feather format // CRAMINO_POST ( ch_minimap_bam ) @@ -662,7 +774,9 @@ workflow LRSOMATIC { if (!params.skip_nanoplot) { // - // Module: Nanoplot + // MODULE: NANOPLOT_POST (label: process_medium) + // Input: CRAMINO_POST.out.arrow -- [meta, arrow_file] + // Output: HTML/txt QC reports (post-alignment) // NANOPLOT_POST(CRAMINO_POST.out.arrow) @@ -681,12 +795,19 @@ workflow LRSOMATIC { if (!params.skip_qc && !params.skip_mosdepth) { - // prepare mosdepth input channel: we need to specify compulsory path to bed as well + // MOSDEPTH requires a BED file argument; pass [] to compute genome-wide depth ch_minimap_bam.join(MINIMAP2_ALIGN.out.index) .map { meta, bam, bai -> [meta, bam, bai, []] } .set { ch_mosdepth_in } - // [meta, bam, bai, []] -- [] is the required empty BED path for MOSDEPTH + // ch_mosdepth_in: [meta, bam, bai, []] -- [] is the optional BED (empty = genome-wide) + // + // MODULE: MOSDEPTH (label: process_medium) + // Input: [meta, bam, bai, bed] -- bed is [] for genome-wide coverage + // ch_fasta -- [[:], fasta] -- used for CRAM decoding (if applicable) + // Output: .global_txt -- [meta, txt] -- global depth summary + // .summary_txt -- [meta, txt] -- per-contig depth summary + // MOSDEPTH ( ch_mosdepth_in, ch_fasta @@ -697,7 +818,12 @@ workflow LRSOMATIC { } // - // SUBWORKFLOW: BAM_STATS_SAMTOOLS + // SUBWORKFLOW: BAM_STATS_SAMTOOLS (nf-core subworkflow) + // Input: [meta, bam, bai] -- aligned BAM with index + // ch_fasta -- [[:], fasta] + // Output: .stats -- [meta, txt] -- samtools stats output + // .flagstat -- [meta, txt] -- samtools flagstat output + // .idxstats -- [meta, txt] -- samtools idxstats output // ch_bam_stats = channel.empty() ch_bam_flagstat = channel.empty() @@ -706,7 +832,7 @@ workflow LRSOMATIC { if (!params.skip_qc && !params.skip_bamstats ) { BAM_STATS_SAMTOOLS ( - ch_minimap_bam.join(MINIMAP2_ALIGN.out.index), // Join bam channel with index channel + ch_minimap_bam.join(MINIMAP2_ALIGN.out.index), // [meta, bam, bai] ch_fasta ) @@ -716,16 +842,20 @@ workflow LRSOMATIC { } // - // MODULE: ASCAT + // MODULE: ASCAT (label: process_high) + // Input: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] -- NOTE: normal before tumor (ASCAT convention) + // allele_files, loci_files, gc_file, rt_file -- ASCAT reference files + // Output: .png plots, .segments, .purity_ploidy -- copy number results // if (!params.skip_ascat) { + // ASCAT expects [normal, tumor] order; rearrange from severus_input [tumor, normal] order severus_input .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, _vcf, _tbi -> return [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] } .set { ascat_ch } - // [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] -- NOTE: normal before tumor (ASCAT convention) + // ascat_ch: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] ASCAT ( ascat_ch, @@ -742,19 +872,25 @@ workflow LRSOMATIC { } // - // MODULE: WAKHAN + // MODULE: WAKHAN (label: process_medium) + // Haplotype-aware genome assembly and variant phasing visualisation + // Input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_germline_vcf, severus_all_vcf] + // ch_fasta -- [[:], fasta] + // centromere_bed -- BED file of centromere coordinates (for assembly anchoring) + // Output: WAKHAN assembly reports (written to outdir) // if (!params.skip_wakhan) { - // Prepare input channel for WAKHAN + // Attach SEVERUS SV VCF to the severus_input channel (dropping the phased TBI) severus_input .join(SEVERUS.out.all_vcf) .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, _phased_tbi, all_vcf -> return [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, all_vcf] } .set { wakhan_input } - // [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, severus_all_vcf] + // wakhan_input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_germline_vcf, severus_all_vcf] + // normal_bam/bai are [] for tumor-only samples WAKHAN ( wakhan_input, @@ -764,25 +900,31 @@ workflow LRSOMATIC { } // - // Collate and save software versions + // Collate software versions from two sources: + // 1. ch_versions (classic path): version YAML files emitted by modules + // 2. channel.topic("versions") (topic channel path): version tuples [process, tool, version] + // emitted directly by modules that use the topic-channel pattern // def topic_versions = channel.topic("versions") - .distinct() + .distinct() // deduplicate identical version entries across samples .branch { entry -> - versions_file: entry instanceof Path - versions_tuple: true + versions_file: entry instanceof Path // classic YAML file path + versions_tuple: true // [process, tool, version] tuple } def topic_versions_string = topic_versions.versions_tuple .map { process, tool, version -> + // Strip workflow prefix (everything before the last ':') from process name [ process[process.lastIndexOf(':')+1..-1], " ${tool}: ${version}" ] } - .groupTuple(by:0) + .groupTuple(by:0) // group tool versions by process name .map { process, tool_versions -> tool_versions.unique().sort() "${process}:\n${tool_versions.join('\n')}" } + // topic_versions_string: formatted YAML-like string per process, ready to write + // Merge both version sources and write to versions YAML (consumed by MultiQC) softwareVersionsToYAML(ch_versions.mix(topic_versions.versions_file)) .mix(topic_versions_string) .collectFile( @@ -791,10 +933,14 @@ workflow LRSOMATIC { sort: true, newLine: true ).set { ch_collated_versions } + // ch_collated_versions: path -- merged software versions YAML for MultiQC // - // MODULE: MultiQC + // MODULE: MULTIQC (label: process_single) + // Aggregates QC reports from all modules into a single HTML report + // Input: [[id:'multiqc'], [qc_files...], [config_files...], [logo], [], []] + // Output: .report -- [meta, html] -- MultiQC HTML report // summary_params = paramsSummaryMap( workflow, parameters_schema: "nextflow_schema.json") @@ -816,7 +962,8 @@ workflow LRSOMATIC { ) ) - // Collect MultiQC files + // Collect QC outputs from all optional modules + // .collect{it -> it[1]} extracts the file from [meta, file] tuples; ifEmpty([]) handles skipped modules ch_multiqc_files = ch_multiqc_files.mix(ch_bam_stats.collect{it -> it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_bam_flagstat.collect{it -> it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_bam_idxstats.collect{it -> it[1]}.ifEmpty([])) @@ -829,6 +976,7 @@ workflow LRSOMATIC { ch_multiqc_files = ch_multiqc_files.mix(whatshap_stats_txt.collect{it -> it[1]}.ifEmpty([])) + // Build the final MULTIQC input tuple: all QC files + config files + logo MULTIQC ( ch_multiqc_files .collect() @@ -838,6 +986,7 @@ workflow LRSOMATIC { multiqc_config_files += [file(params.multiqc_config, checkIfExists: true)] } def multiqc_logo_file = params.multiqc_logo ? [file(params.multiqc_logo, checkIfExists: true)] : [] + // MULTIQC input: [meta, [qc_files], [config_files], [logo], [], []] [[id: 'multiqc'], files, multiqc_config_files, multiqc_logo_file, [], []] } ) From 5cfadfb1ff1651a7e088e9ff46d3897aa76f64b7 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Mon, 30 Mar 2026 12:11:35 +0200 Subject: [PATCH 35/36] prettier --- subworkflows/local/phasing_haplotyping.nf | 2 +- workflows/lrsomatic.nf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/phasing_haplotyping.nf b/subworkflows/local/phasing_haplotyping.nf index e0aaf290..832d196d 100644 --- a/subworkflows/local/phasing_haplotyping.nf +++ b/subworkflows/local/phasing_haplotyping.nf @@ -129,7 +129,7 @@ workflow PHASING_HAPLOTYPING { // fasta / fai // Output: .mod_vcf -- [meta, vcf] -- base modification calls for tumor // - + LONGPHASE_MODCALL_SOMATIC ( tumor_bams_ch, fasta, diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index 10da2149..6cf4cad5 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -258,7 +258,7 @@ workflow LRSOMATIC { // (replicate field removed; bams still a list — concatenated next) // Branch on number of input BAMs: samples with a single BAM skip concatenation - + ch_split = ch_samplesheet_no_rep .branch { _meta, bam -> single: bam.size() == 1 From deefcf928dcfd0ecef8fae63fee6dbc120444484 Mon Sep 17 00:00:00 2001 From: robert-a-forsyth Date: Tue, 31 Mar 2026 17:05:50 +0200 Subject: [PATCH 36/36] documentation, small fixes to config --- conf/base.config | 4 +-- conf/modules.config | 4 +-- modules/local/clair3/main.nf | 7 ++-- .../local/deepsomatic/callvariants/main.nf | 2 +- .../nf-core/deepvariant/callvariants/main.nf | 2 +- .../deepvariant-makeexamples.diff | 8 +++++ .../deepvariant-postprocessvariants.diff | 8 +++++ .../haplotag/longphase-haplotag.diff | 34 +++++++++++++++++++ nextflow_schema.json | 6 ++-- 9 files changed, 63 insertions(+), 12 deletions(-) create mode 100644 modules/nf-core/longphase/haplotag/longphase-haplotag.diff diff --git a/conf/base.config b/conf/base.config index 08378854..d39f1fce 100644 --- a/conf/base.config +++ b/conf/base.config @@ -31,14 +31,14 @@ process { accelerator = { (params.use_gpu as boolean) ? 1 : null } cpus = { 8 * task.attempt } memory = { 48.GB * task.attempt } - time = { 16.h * task.attempt } + time = { 8.h * task.attempt } } withLabel:process_gpu_very_high { ext.use_gpu = { params.use_gpu as boolean } accelerator = { (params.use_gpu as boolean) ? 1 : null } cpus = { 16 * task.attempt } memory = { 96.GB * task.attempt } - time = { 16.h * task.attempt } + time = { 10.h * task.attempt } } withLabel:process_gpu_very_high_memory { ext.use_gpu = { params.use_gpu as boolean } diff --git a/conf/modules.config b/conf/modules.config index 4fa65136..8a66d956 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -192,10 +192,10 @@ process { meta.platform == 'pb' ? meta.paired_data ? '--channel_list "BASE_CHANNELS,haplotype" --alt_aligned_pileup "diff_channels" --pileup_image_width "99"' - : '--channel_list "BASE_CHANNELS,haplotype,allele_frequency" --alt_aligned_pileup "diff_channels" --pileup_image_width "99" --population_vcfs "/opt/models/deepsomatic/pons/AF_pacbio_PON_CoLoRSdb.GRCh38.AF0.05.vcf.gz"' + : '--channel_list "BASE_CHANNELS,haplotype,allele_frequency" --alt_aligned_pileup "diff_channels" --pileup_image_width "99" --population_vcfs "/opt/models/deepsomatic/pons/AF_pacbio_PON_CoLoRSdb.GRCh38.AF0.05.vcf.gz","AF_ilmn_PON_DeepVariant.GRCh38.AF0.05.vcf.gz","PON_dbsnp138_gnomad_ILMN1000g_pon.vcf.gz","PON_dbsnp138_gnomad_PB1000g_pon.vcf.gz"' : meta.paired_data ? '--channel_list "BASE_CHANNELS,haplotype" --alt_aligned_pileup "diff_channels" --pileup_image_width "99"' - : '--channel_list "BASE_CHANNELS,haplotype,allele_frequency" --alt_aligned_pileup "diff_channels" --pileup_image_width "99" --population_vcfs "/opt/models/deepsomatic/pons/ON_dbsnp138_gnomad_ILMN1000g_pon.vcf.gz"' + : '--channel_list "BASE_CHANNELS,haplotype,allele_frequency" --alt_aligned_pileup "diff_channels" --pileup_image_width "99" --population_vcfs "/opt/models/deepsomatic/pons/AF_pacbio_PON_CoLoRSdb.GRCh38.AF0.05.vcf.gz","AF_ilmn_PON_DeepVariant.GRCh38.AF0.05.vcf.gz","PON_dbsnp138_gnomad_ILMN1000g_pon.vcf.gz","PON_dbsnp138_gnomad_PB1000g_pon.vcf.gz"' } publishDir = [ enabled: false diff --git a/modules/local/clair3/main.nf b/modules/local/clair3/main.nf index 44479086..64e8f8fd 100644 --- a/modules/local/clair3/main.nf +++ b/modules/local/clair3/main.nf @@ -4,8 +4,8 @@ process CLAIR3 { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/clair3:1.2.0--py310h779eee5_0': - 'quay.io/biocontainers/clair3:1.2.0--py310h779eee5_0' }" + (params.use_gpu ? 'docker://hkubal/clair3-gpu:v1.2.0' : 'https://depot.galaxyproject.org/singularity/clair3:1.2.0--py310h779eee5_0') : + (params.use_gpu ? 'docker.io/hkubal/clair3-gpu:v1.2.0' : 'quay.io/biocontainers/clair3:1.2.0--py310h779eee5_0') }" input: tuple val(meta) , path(bam), path(bai), path(model), val(platform) @@ -25,8 +25,10 @@ process CLAIR3 { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" + def use_gpu = task.ext.use_gpu as boolean """ + ${use_gpu ? 'export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}' : ':'} run_clair3.sh \\ --bam_fn=${bam} \\ --ref_fn=${reference} \\ @@ -35,6 +37,7 @@ process CLAIR3 { --platform=${platform} \\ --model=${model} \\ --sample_name=${prefix} \\ + ${use_gpu ? '--use_gpu --device=cuda:0' : ''} \\ ${args} """ diff --git a/modules/local/deepsomatic/callvariants/main.nf b/modules/local/deepsomatic/callvariants/main.nf index afeb33e1..a2d7bc3e 100644 --- a/modules/local/deepsomatic/callvariants/main.nf +++ b/modules/local/deepsomatic/callvariants/main.nf @@ -3,7 +3,7 @@ process DEEPSOMATIC_CALLVARIANTS { label "${params.use_gpu ? 'process_gpu_high' : 'process_high'}" //Conda is not supported at the moment - container "docker.io/google/deepsomatic:1.7.0" + container params.use_gpu ? "docker.io/google/deepsomatic:1.7.0-gpu" : "docker.io/google/deepsomatic:1.7.0" input: tuple val(meta), path(make_examples_tfrecords) diff --git a/modules/nf-core/deepvariant/callvariants/main.nf b/modules/nf-core/deepvariant/callvariants/main.nf index e0b24884..a8516403 100644 --- a/modules/nf-core/deepvariant/callvariants/main.nf +++ b/modules/nf-core/deepvariant/callvariants/main.nf @@ -4,7 +4,7 @@ process DEEPVARIANT_CALLVARIANTS { label "${params.use_gpu ? 'process_gpu_very_high' : 'process_very_high'}" //Conda is not supported at the moment - container "docker.io/google/deepvariant:1.9.0" + container params.use_gpu ? "docker.io/google/deepvariant:1.9.0-gpu" : "docker.io/google/deepvariant:1.9.0" input: tuple val(meta), path(make_examples_tfrecords) diff --git a/modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff b/modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff index 8e5312b2..ee309ee6 100644 --- a/modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff +++ b/modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff @@ -3,6 +3,14 @@ Changes in component 'nf-core/deepvariant/makeexamples' Changes in 'deepvariant/makeexamples/main.nf': --- modules/nf-core/deepvariant/makeexamples/main.nf +++ modules/nf-core/deepvariant/makeexamples/main.nf +@@ -1,6 +1,6 @@ + process DEEPVARIANT_MAKEEXAMPLES { + tag "$meta.id" +- label 'process_high' ++ label 'process_very_high' + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" @@ -36,6 +36,7 @@ --mode calling \\ --ref "${fasta}" \\ diff --git a/modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff b/modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff index c7acc49a..c46b2925 100644 --- a/modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff +++ b/modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff @@ -3,6 +3,14 @@ Changes in component 'nf-core/deepvariant/postprocessvariants' Changes in 'deepvariant/postprocessvariants/main.nf': --- modules/nf-core/deepvariant/postprocessvariants/main.nf +++ modules/nf-core/deepvariant/postprocessvariants/main.nf +@@ -1,6 +1,6 @@ + process DEEPVARIANT_POSTPROCESSVARIANTS { + tag "$meta.id" +- label 'process_medium' ++ label 'process_high' + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" @@ -64,6 +64,7 @@ --outfile "${prefix}.vcf.gz" \\ --nonvariant_site_tfrecord_path "${gvcf_tfrecords_logical_name}" \\ diff --git a/modules/nf-core/longphase/haplotag/longphase-haplotag.diff b/modules/nf-core/longphase/haplotag/longphase-haplotag.diff new file mode 100644 index 00000000..0d93067d --- /dev/null +++ b/modules/nf-core/longphase/haplotag/longphase-haplotag.diff @@ -0,0 +1,34 @@ +Changes in component 'nf-core/longphase/haplotag' +'modules/nf-core/longphase/haplotag/meta.yml' is unchanged +Changes in 'longphase/haplotag/main.nf': +--- modules/nf-core/longphase/haplotag/main.nf ++++ modules/nf-core/longphase/haplotag/main.nf +@@ -42,6 +42,11 @@ + if [ -f "${prefix}.out" ]; then + mv ${prefix}.out ${prefix}.log + fi ++ ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') ++ END_VERSIONS + """ + + stub: +@@ -52,5 +57,10 @@ + """ + touch ${prefix}.${suffix} + ${log} ++ ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') ++ END_VERSIONS + """ +-} ++} +'modules/nf-core/longphase/haplotag/environment.yml' is unchanged +'modules/nf-core/longphase/haplotag/tests/main.nf.test' is unchanged +'modules/nf-core/longphase/haplotag/tests/main.nf.test.snap' is unchanged +'modules/nf-core/longphase/haplotag/tests/nextflow.config' is unchanged +************************************************************ diff --git a/nextflow_schema.json b/nextflow_schema.json index eceb2aaf..06a0603b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -66,7 +66,6 @@ "germline_var_keep": { "type": "array", "description": "List of germline variant callers to use. Must include at least one of [deepvariant, clair].", - "default": ["deepvariant", "clair"], "items": { "type": "string", "enum": ["deepvariant", "clair"] @@ -76,7 +75,6 @@ "somatic_var_keep": { "type": "array", "description": "List of somatic variant callers to use. Must include at least one of [deepsomatic, clair].", - "default": ["deepsomatic", "clair"], "items": { "type": "string", "enum": ["deepsomatic", "clair"] @@ -371,7 +369,7 @@ "use_gpu": { "type": "boolean", "default": false, - "description": "Use GPU for supported tools (e.g. DeepVariant, DeepSomatic)" + "description": "Use GPU for supported tools (e.g. DeepVariant, DeepSomatic, Clair3)" } } }, @@ -522,7 +520,7 @@ "hidden": true }, "help": { - "type": ["boolean", "string"], + "type": "boolean", "default": false, "description": "Display the help message." },