diff --git a/.gitignore b/.gitignore index 9e307203..c5c144e3 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ testing* null/ .nf-test .nf-test.log +out/ diff --git a/conf/base.config b/conf/base.config index 84625158..d39f1fce 100644 --- a/conf/base.config +++ b/conf/base.config @@ -26,6 +26,26 @@ process { // adding in your local modules too. // TODO nf-core: Customise requirements for specific processes. // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors + withLabel:process_gpu_high { + ext.use_gpu = { params.use_gpu as boolean } + accelerator = { (params.use_gpu as boolean) ? 1 : null } + cpus = { 8 * task.attempt } + memory = { 48.GB * task.attempt } + time = { 8.h * task.attempt } + } + withLabel:process_gpu_very_high { + ext.use_gpu = { params.use_gpu as boolean } + accelerator = { (params.use_gpu as boolean) ? 1 : null } + cpus = { 16 * task.attempt } + memory = { 96.GB * task.attempt } + time = { 10.h * task.attempt } + } + withLabel:process_gpu_very_high_memory { + ext.use_gpu = { params.use_gpu as boolean } + accelerator = { (params.use_gpu as boolean) ? 1 : null } + cpus = { 16 * task.attempt } + memory = { 128.GB * task.attempt } + } withLabel:process_single { cpus = { 1 } memory = { 6.GB * task.attempt } @@ -64,8 +84,4 @@ process { errorStrategy = 'retry' maxRetries = 2 } - withLabel: process_gpu { - ext.use_gpu = { workflow.profile.contains('gpu') } - accelerator = { workflow.profile.contains('gpu') ? 1 : null } - } } diff --git a/conf/igenomes.config b/conf/igenomes.config index 06bd28a4..d9c42c81 100644 --- a/conf/igenomes.config +++ b/conf/igenomes.config @@ -43,6 +43,7 @@ params { dbsnp = "${params.igenomes_base}/Homo_sapiens/ClairSTO/CHM13/Annotation/ClairSTO-pon/final_dbsnp.vcf.gz" onekgenomes = "${params.igenomes_base}/Homo_sapiens/ClairSTO/CHM13/Annotation/ClairSTO-pon/final_1kgenomes.vcf.gz" colors = "${params.igenomes_base}/Homo_sapiens/ClairSTO/CHM13/Annotation/ClairSTO-pon/final_colors.vcf.gz" + asap = "${params.igenomes_base}/Homo_sapiens/ClairSTO/CHM13/Annotation/ClairSTO-pon/WGS_CHM13_ASAP.vcf.gz" } 'GRCm38' { fasta = "${params.igenomes_base}/Mus_musculus/Ensembl/GRCm38/Sequence/WholeGenomeFasta/genome.fa" diff --git a/conf/modules.config b/conf/modules.config index 76685f9a..8a66d956 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -116,6 +116,121 @@ process { ] } + withName: '.*:BCFTOOLS_NORM' { + ext.prefix = { "${meta.id}.${meta.caller}_norm" } + ext.args = { + "-Oz -W=tbi" + } + publishDir = [ + enabled: false + ] + } + + withName: '.*:BCFTOOLS_ISEC' { + ext.prefix = { "${meta.id}_isec" } + ext.args ={ + "-Oz" + } + publishDir = [ + enabled: false + ] + } + withName: '.*:BCFTOOLS_ANNOTATE' { + ext.prefix = { "${meta.id}.${meta.caller}" } + ext.args = { + '''-h <(echo '##INFO=') \ + -c CHROM,POS,REF,ALT,INFO/CALLER \ + -Oz \ + -W=tbi''' + } + publishDir = [ + enabled: false + ] + } + withName: '.*:BCFTOOLS_QUERY' { + ext.args = { + "-f '%CHROM\t%POS\t%REF\t%ALT\t${meta.caller}\n'" + } + publishDir = [ + enabled: false + ] + } + + withName: '.*DEEPVARIANT_MAKEEXAMPLES' { + ext.args = { + meta.platform == 'pb' + ? '--channel_list "BASE_CHANNELS,haplotype,base_6ma" --alt_aligned_pileup "diff_channels" --pileup_image_width "147"' + : '--channel_list "BASE_CHANNELS,haplotype" --alt_aligned_pileup "diff_channels" --pileup_image_width "99"' + } + publishDir = [ + enabled: false + ] + } + + withName: '.*DEEPVARIANT_POSTPROCESSVARIANTS' { + publishDir = [ + path: { "${params.outdir}/${meta.id}/variants/deepvariant" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + (filename.equals('versions.yml') || filename.endsWith('.g.vcf.gz') || filename.endsWith('.g.vcf.gz.tbi')) ? null : filename + } + ] + + } + + withName: '.*DEEPVARIANT_CALLVARIANTS' { + ext.args = { + meta.platform == 'pb' ? ("--checkpoint '/opt/models/pacbio' ") : ("--checkpoint '/opt/models/ont_r104'") + } + publishDir = [ + enabled : false + ] + } + + withName: '.*DEEPSOMATIC_MAKEEXAMPLES' { + ext.args = { + meta.platform == 'pb' + ? meta.paired_data + ? '--channel_list "BASE_CHANNELS,haplotype" --alt_aligned_pileup "diff_channels" --pileup_image_width "99"' + : '--channel_list "BASE_CHANNELS,haplotype,allele_frequency" --alt_aligned_pileup "diff_channels" --pileup_image_width "99" --population_vcfs "/opt/models/deepsomatic/pons/AF_pacbio_PON_CoLoRSdb.GRCh38.AF0.05.vcf.gz","AF_ilmn_PON_DeepVariant.GRCh38.AF0.05.vcf.gz","PON_dbsnp138_gnomad_ILMN1000g_pon.vcf.gz","PON_dbsnp138_gnomad_PB1000g_pon.vcf.gz"' + : meta.paired_data + ? '--channel_list "BASE_CHANNELS,haplotype" --alt_aligned_pileup "diff_channels" --pileup_image_width "99"' + : '--channel_list "BASE_CHANNELS,haplotype,allele_frequency" --alt_aligned_pileup "diff_channels" --pileup_image_width "99" --population_vcfs "/opt/models/deepsomatic/pons/AF_pacbio_PON_CoLoRSdb.GRCh38.AF0.05.vcf.gz","AF_ilmn_PON_DeepVariant.GRCh38.AF0.05.vcf.gz","PON_dbsnp138_gnomad_ILMN1000g_pon.vcf.gz","PON_dbsnp138_gnomad_PB1000g_pon.vcf.gz"' + } + publishDir = [ + enabled: false + ] + } + + withName: '.*DEEPSOMATIC_POSTPROCESSVARIANTS' { + ext.args = { + '--process_somatic=true' + } + publishDir = [ + path: { "${params.outdir}/${meta.id}/variants/deepsomatic" }, + mode: params.publish_dir_mode, + saveAs: { filename -> + (filename.equals('versions.yml') || filename.endsWith('.g.vcf.gz') || filename.endsWith('.g.vcf.gz.tbi')) ? null : filename + } + ] + } + + withName: '.*DEEPSOMATIC_CALLVARIANTS' { + ext.args = { + meta.platform == 'pb' + ? (meta.paired_data + ? "--checkpoint '/opt/models/deepsomatic/pacbio'" + : "--checkpoint '/opt/models/deepsomatic/pacbio_tumor_only'" ) + : (meta.paired_data + ? "--checkpoint '/opt/models/deepsomatic/ont'" + : "--checkpoint '/opt/models/deepsomatic/ont_tumor_only'") + } + publishDir = [ + enabled : false + ] + } + + withName: '.*:UNZIP_.*' { publishDir = [ enabled: false @@ -161,7 +276,7 @@ process { // withName: '.*:MINIMAP2_ALIGN' { - ext.prefix = { "${meta.id}_mapped" } + ext.prefix = { "${meta.id}_${meta.type}_mapped" } ext.args = { [ meta.platform == 'pb' ? ( params.minimap2_pb_model ? "-ax $params.minimap2_pb_model" : "-ax map-hifi" ) : @@ -218,7 +333,7 @@ process { // Phasing processes // - withName: '.*:LONGPHASE_PHASE' { + withName: '.*:LONGPHASE_PHASE_GERMLINE' { ext.prefix = { "germline_smallvariants" } ext.args = { [ @@ -232,8 +347,21 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } - - withName: '.*:LONGPHASE_HAPLOTAG' { + withName: '.*:LONGPHASE_PHASE_SOMATIC' { + ext.prefix = { "somatic_smallvariants" } + ext.args = { + [ + meta.platform == 'pb' ? '--pb' : '--ont', + "--indels", + ].join(' ').trim() + } + publishDir = [ + path: { "${params.outdir}/${meta.id}/variants/phased" }, + mode: params.publish_dir_mode, + saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + ] + } + withName: '.*:LONGPHASE_HAPLOTAG*' { ext.prefix = { "${meta.id}_${meta.type}" } publishDir = [ path: { "${params.outdir}/${meta.id}/bamfiles" }, @@ -241,6 +369,11 @@ process { saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] } + withName: '.*:LONGPHASE_MODCALL.*' { + publishDir = [ + enabled: false + ] + } // // Structural variant calling processes @@ -260,14 +393,35 @@ process { // Small variant calling processes // withName: '.*:BCFTOOLS_CONCAT' { - ext.args = '--output-type z -a' + ext.args = {'-Oz -a -W=tbi'} publishDir = [ enabled: false ] } withName: '.*:BCFTOOLS_SORT' { ext.prefix = { "${meta.id}_sorted" } - ext.arge = '--output-type z' + ext.args = {'-Oz -W=tbi'} + publishDir = [ + enabled: false + ] + } + withName: '.*:GERMLINE_CONSENSUS:BCFTOOLS_SORT' { + ext.prefix = { "${meta.id}_germline_sorted" } + ext.args = {'-Oz -W=tbi'} + publishDir = [ + enabled: false + ] + } + withName: '.*:SOMATIC_CONSENSUS:BCFTOOLS_SORT' { + ext.prefix = { "${meta.id}_somatic_sorted" } + ext.args = {'-Oz -W=tbi'} + publishDir = [ + enabled: false + ] + } + withName: '.*:PAIRED_SMALLVAR_SOMATIC:BCFTOOLS_SORT' { + ext.prefix = { "${meta.id}_somatic_sorted" } + ext.args = {'-Oz -W=tbi'} publishDir = [ enabled: false ] diff --git a/conf/test.config b/conf/test.config index 416ed610..e7014818 100644 --- a/conf/test.config +++ b/conf/test.config @@ -18,6 +18,29 @@ process { time: '1.h' ] } + + withName: '.*DEEPVARIANT_MAKEEXAMPLES' { + ext.args = { + "--regions 'chr19'" + } + } + + withName: '.*DEEPVARIANT_POSTPROCESSVARIANTS' { + ext.args = { + "--regions 'chr19'" + } + } + withName: '.*DEEPSOMATIC_MAKEEXAMPLES' { + ext.args = { + "--regions 'chr19'" + } + } + withName: '.*DEEPSOMATIC_POSTPROCESSVARIANTS' { + ext.args = { + "--regions 'chr19'" + } + } + } params { @@ -29,7 +52,7 @@ params { fasta = "https://raw.githubusercontent.com/IntGenomicsLab/test-datasets/main/references/GRCh38_chr19.fasta.gz" // Additional params - genome = "GRCh38" + genome = "CHM13" vep_genome = "WBcel235" vep_species = "caenorhabditis_elegans" skip_wakhan = true diff --git a/modules.json b/modules.json index 5d114f9c..4b49d95a 100644 --- a/modules.json +++ b/modules.json @@ -11,16 +11,39 @@ "installed_by": ["modules"], "patch": "modules/nf-core/ascat/ascat.diff" }, + "bcftools/annotate": { + "branch": "master", + "git_sha": "3d9c2f4beaa4f62b3f006928fd9095a496d1e5a8", + "installed_by": ["modules"] + }, "bcftools/concat": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", "installed_by": ["modules", "vcf_gather_bcftools"] }, + "bcftools/isec": { + "branch": "master", + "git_sha": "3b2c3559699a7bca6a7c2b220695a072e030e17d", + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/isec/bcftools-isec.diff" + }, "bcftools/merge": { "branch": "master", "git_sha": "3d9c2f4beaa4f62b3f006928fd9095a496d1e5a8", + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/merge/bcftools-merge.diff" + }, + "bcftools/norm": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", "installed_by": ["modules"] }, + "bcftools/query": { + "branch": "master", + "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", + "installed_by": ["modules"], + "patch": "modules/nf-core/bcftools/query/bcftools-query.diff" + }, "bcftools/sort": { "branch": "master", "git_sha": "6383d8fe58f9498eecd5aa303e71a4a932d1e9f6", @@ -30,17 +53,20 @@ "deepvariant/callvariants": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": ["deepvariant"] + "installed_by": ["deepvariant"], + "patch": "modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff" }, "deepvariant/makeexamples": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": ["deepvariant"] + "installed_by": ["deepvariant"], + "patch": "modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff" }, "deepvariant/postprocessvariants": { "branch": "master", "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", - "installed_by": ["deepvariant"] + "installed_by": ["deepvariant"], + "patch": "modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff" }, "ensemblvep/download": { "branch": "master", @@ -55,12 +81,13 @@ }, "longphase/haplotag": { "branch": "master", - "git_sha": "e753770db613ce014b3c4bc94f6cba443427b726", - "installed_by": ["modules"] + "git_sha": "b8d30a43f33aee3148b0e9e9f00587984a4ac195", + "installed_by": ["modules"], + "patch": "modules/nf-core/longphase/haplotag/longphase-haplotag.diff" }, "longphase/phase": { "branch": "master", - "git_sha": "47983538e45e539f783ed8ab0d1c96d39df2af8f", + "git_sha": "b8d30a43f33aee3148b0e9e9f00587984a4ac195", "installed_by": ["modules"], "patch": "modules/nf-core/longphase/phase/longphase-phase.diff" }, @@ -166,6 +193,12 @@ "git_sha": "7ac6cbe7c17c2dad685da7f70496c8f48ea48687", "installed_by": ["subworkflows"] }, + "deepvariant": { + "branch": "master", + "git_sha": "f2b138ee1d91f67d31c187317d7e83e429bf0309", + "installed_by": ["subworkflows"], + "patch": "subworkflows/nf-core/deepvariant/deepvariant.diff" + }, "utils_nextflow_pipeline": { "branch": "master", "git_sha": "05954dab2ff481bcb999f24455da29a5828af08d", diff --git a/modules/local/clair3/main.nf b/modules/local/clair3/main.nf index 6619ef14..64e8f8fd 100644 --- a/modules/local/clair3/main.nf +++ b/modules/local/clair3/main.nf @@ -1,11 +1,11 @@ process CLAIR3 { tag "$meta.id" - label 'process_very_high' + label "${params.use_gpu ? 'process_gpu_very_high' : 'process_very_high'}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/clair3:1.2.0--py310h779eee5_0': - 'quay.io/biocontainers/clair3:1.2.0--py310h779eee5_0' }" + (params.use_gpu ? 'docker://hkubal/clair3-gpu:v1.2.0' : 'https://depot.galaxyproject.org/singularity/clair3:1.2.0--py310h779eee5_0') : + (params.use_gpu ? 'docker.io/hkubal/clair3-gpu:v1.2.0' : 'quay.io/biocontainers/clair3:1.2.0--py310h779eee5_0') }" input: tuple val(meta) , path(bam), path(bai), path(model), val(platform) @@ -24,15 +24,21 @@ process CLAIR3 { script: def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def use_gpu = task.ext.use_gpu as boolean + """ + ${use_gpu ? 'export CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-0}' : ':'} run_clair3.sh \\ - --bam_fn=$bam \\ - --ref_fn=$reference \\ - --threads=$task.cpus \\ + --bam_fn=${bam} \\ + --ref_fn=${reference} \\ + --threads=${task.cpus} \\ --output=. \\ - --platform=$platform \\ - --model=$model \\ - $args + --platform=${platform} \\ + --model=${model} \\ + --sample_name=${prefix} \\ + ${use_gpu ? '--use_gpu --device=cuda:0' : ''} \\ + ${args} """ stub: diff --git a/modules/local/clairs/main.nf b/modules/local/clairs/main.nf index d683c777..a7a310b5 100644 --- a/modules/local/clairs/main.nf +++ b/modules/local/clairs/main.nf @@ -20,6 +20,7 @@ process CLAIRS { task.ext.when == null || task.ext.when script: + def prefix = task.ext.prefix ?: "${meta.id}" def args = task.ext.args ?: '' """ @@ -29,6 +30,7 @@ process CLAIRS { --ref_fn $reference \\ --threads $task.cpus \\ --platform $model \\ + --sample_name ${prefix} \\ --output_dir . \\ --output_prefix snvs \\ $args diff --git a/modules/local/clairsto/main.nf b/modules/local/clairsto/main.nf index 9071487d..7147061e 100644 --- a/modules/local/clairsto/main.nf +++ b/modules/local/clairsto/main.nf @@ -8,13 +8,9 @@ process CLAIRSTO { 'docker.io/hkubal/clairs-to:v0.4.2' }" input: - tuple val(meta), path(tumor_bam), path(tumor_bai), val(model) + tuple val(meta), path(tumor_bam), path(tumor_bai), val(model), path(pon_vcfs), val(pon_flags) tuple val(meta2), path(reference) tuple val(meta3), path(index) - path(dbSNP) - path(colors) - path(onekgenomes) - path(gnomad) output: tuple val(meta), path("indel.vcf.gz"), emit: indel_vcf @@ -28,11 +24,10 @@ process CLAIRSTO { script: def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" def conda_prefix = workflow.containerEngine == 'singularity' ? '--conda_prefix /opt/micromamba/envs/clairs-to' : '' - def gnomad_arg = gnomad ?: 'gnomad.r2.1.af-ge-0.001.sites.vcf.gz' - def dbSNP_arg = dbSNP ?: 'dbsnp.b138.non-somatic.sites.vcf.gz' - def onekgenomes_arg = onekgenomes ?: '1000g-pon.sites.vcf.gz' - def colors_arg = colors ?: 'colors-pon.sites.vcf.gz' + def pon_string = pon_vcfs.join(',') + def flags_string = pon_flags.join(',') """ /opt/bin/run_clairs_to \ @@ -41,8 +36,9 @@ process CLAIRSTO { --platform $model \\ --threads $task.cpus \\ --output_dir . \\ - --panel_of_normals "${gnomad_arg},${dbSNP_arg},${onekgenomes_arg},${colors_arg}" \\ - --panel_of_normals_require_allele_matching 'True,True,False,False' \\ + --sample_name ${prefix} \\ + --panel_of_normals ${pon_string} \\ + --panel_of_normals_require_allele_matching ${flags_string} \\ $conda_prefix \\ $args \\ """ diff --git a/modules/local/deepsomatic/callvariants/main.nf b/modules/local/deepsomatic/callvariants/main.nf new file mode 100644 index 00000000..a2d7bc3e --- /dev/null +++ b/modules/local/deepsomatic/callvariants/main.nf @@ -0,0 +1,49 @@ +process DEEPSOMATIC_CALLVARIANTS { + tag "$meta.id" + label "${params.use_gpu ? 'process_gpu_high' : 'process_high'}" + + //Conda is not supported at the moment + container params.use_gpu ? "docker.io/google/deepsomatic:1.7.0-gpu" : "docker.io/google/deepsomatic:1.7.0" + + input: + tuple val(meta), path(make_examples_tfrecords) + + output: + tuple val(meta), path("${prefix}.call-*-of-*.tfrecord.gz") , emit: call_variants_tfrecords + tuple val("${task.process}"), val('deepsomatic'), val('1.7.0'), topic: versions, emit: versions_deepsomatic + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def matcher = make_examples_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ + if (!matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + make_examples_tfrecords[0].baseName + "' doesn't match the expected pattern") + } + def examples_tfrecord_name = matcher[0][1] + def shardCount = matcher[0][2] + // Reconstruct the logical name - ${tfrecord_name}@.gz + def examples_tfrecords_logical_name = "${examples_tfrecord_name}@${shardCount}.gz" + + """ + /opt/deepvariant/bin/call_variants \\ + ${args} \\ + --outfile "${prefix}.call.tfrecord.gz" \\ + --examples "${examples_tfrecords_logical_name}" + + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.call-00000-of-00001.tfrecord.gz + + """ +} diff --git a/modules/local/deepsomatic/makeexamples/main.nf b/modules/local/deepsomatic/makeexamples/main.nf new file mode 100644 index 00000000..f33300ab --- /dev/null +++ b/modules/local/deepsomatic/makeexamples/main.nf @@ -0,0 +1,58 @@ +process DEEPSOMATIC_MAKEEXAMPLES { + tag "$meta.id" + label 'process_high' + + //Conda is not supported at the moment + container "docker.io/google/deepsomatic:1.7.0" + + input: + tuple val(meta), path(normal_input), path(normal_index), path(tumor_input), path(tumor_index) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + + output: + tuple val(meta), path("${prefix}.examples.tfrecord-*-of-*.gz{,.example_info.json}") , emit: examples + tuple val(meta), path("${prefix}.gvcf.tfrecord-*-of-*.gz") , emit: gvcf + tuple val(meta), path("${prefix}_call_variant_outputs.tfrecord-*-of-*.gz", arity: "0..*") , emit: small_model_calls + tuple val("${task.process}"), val('deepsomatic'), val('1.7.0'), topic: versions, emit: versions_deepsomatic + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def normalReadsArg = (normal_input?.toString() && normal_input.toString() != '[]') ? "--reads_normal \"${normal_input}\"" : "" + def normalSampleArg = (normal_input?.toString() && normal_input.toString() != '[]') ? "--sample_name_normal \"${prefix}_normal\"" : "" + + """ + seq 0 ${task.cpus - 1} | parallel -q --halt 2 --line-buffer /opt/deepvariant/bin/make_examples_somatic \\ + --mode calling \\ + --ref "${fasta}" \\ + --reads_tumor "${tumor_input}" \\ + ${normalReadsArg} \\ + --sample_name_tumor "${prefix}" \\ + ${normalSampleArg} \\ + --examples "./${prefix}.examples.tfrecord@${task.cpus}.gz" \\ + --gvcf "./${prefix}.gvcf.tfrecord@${task.cpus}.gz" \\ + ${args} \\ + --task {} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + printf -v SHARD_COUNT "%04d" ${task.cpus} + for i in \$( seq -f "%04g" 0 ${task.cpus-1} ) + do + echo "" | gzip > ${prefix}.examples.tfrecord-\$i-of-\$SHARD_COUNT.gz + touch ${prefix}.examples.tfrecord-\$i-of-\$SHARD_COUNT.gz.example_info.json + echo "" | gzip > ${prefix}.gvcf.tfrecord-\$i-of-\$SHARD_COUNT.gz + done + """ +} diff --git a/modules/local/deepsomatic/postprocessvariants/main.nf b/modules/local/deepsomatic/postprocessvariants/main.nf new file mode 100644 index 00000000..a192b57f --- /dev/null +++ b/modules/local/deepsomatic/postprocessvariants/main.nf @@ -0,0 +1,87 @@ +process DEEPSOMATIC_POSTPROCESSVARIANTS { + tag "$meta.id" + label 'process_medium' + + //Conda is not supported at the moment + container "docker.io/google/deepsomatic:1.7.0" + + input: + tuple val(meta), path(variant_calls_tfrecord_files), path(gvcf_tfrecords), val(small_model_calls), val(intervals) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + tuple val(meta4), path(gzi) + + output: + tuple val(meta), path("${prefix}.vcf.gz") , emit: vcf + tuple val(meta), path("${prefix}.vcf.gz.{tbi,csi}") , emit: vcf_index + tuple val(meta), path("${prefix}.g.vcf.gz") , emit: gvcf + tuple val(meta), path("${prefix}.g.vcf.gz.{tbi,csi}") , emit: gvcf_index + tuple val("${task.process}"), val('deepsomatic'), val('1.7.0'), topic: versions, emit: versions_deepsomatic + + when: + task.ext.when == null || task.ext.when + + script: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPSOMATIC module does not support Conda. Please use Docker / Singularity / Podman instead." + } + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + + def regions = intervals ? "--regions ${intervals}" : "" + def variant_calls_tfrecord_name = variant_calls_tfrecord_files[0].name.replaceFirst(/-\d{5}-of-\d{5}/, "") + + def gvcf_matcher = gvcf_tfrecords[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/ + if (!gvcf_matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + gvcf_tfrecords[0].baseName + "' doesn't match the expected pattern") + } + def gvcf_tfrecord_name = gvcf_matcher[0][1] + def gvcf_shardCount = gvcf_matcher[0][2] + // Reconstruct the logical name - ${tfrecord_name}.examples.tfrecord@${task.cpus}.gz + def gvcf_tfrecords_logical_name = "${gvcf_tfrecord_name}@${gvcf_shardCount}.gz" + + // The following block determines whether the small model was used, and if so, adds the variant calls from it + // to the argument --small_model_cvo_records. + def small_model_arg = "" + if (small_model_calls && small_model_calls.size() > 0) { + def small_model_matcher = (small_model_calls[0].baseName =~ /^(.+)-\d{5}-of-(\d{5})$/) + if (!small_model_matcher.matches()) { + throw new IllegalArgumentException("tfrecord baseName '" + small_model_calls[0].baseName + "' doesn't match the expected pattern") + } + def small_model_tfrecord_name = small_model_matcher[0][1] + def small_model_shardCount = small_model_matcher[0][2] + // Reconstruct the logical name. Example: test_call_variant_outputs.examples.tfrecord@12.gz + def small_model_tfrecords_logical_name = "${small_model_tfrecord_name}@${small_model_shardCount}.gz" + small_model_arg = "--small_model_cvo_records ${small_model_tfrecords_logical_name}" + } + + """ + /opt/deepvariant/bin/postprocess_variants \\ + ${args} \\ + --ref "${fasta}" \\ + --infile "${variant_calls_tfrecord_name}" \\ + --outfile "${prefix}.vcf.gz" \\ + --process_somatic=true \\ + --nonvariant_site_tfrecord_path "${gvcf_tfrecords_logical_name}" \\ + --gvcf_outfile "${prefix}.g.vcf.gz" \\ + ${regions} \\ + ${small_model_arg} \\ + --cpus $task.cpus + + """ + + stub: + // Exit if running this module with -profile conda / -profile mamba + if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) { + error "DEEPVARIANT module does not support Conda. Please use Docker / Singularity / Podman instead." + } + prefix = task.ext.prefix ?: "${meta.id}" + """ + echo "" | gzip > ${prefix}.vcf.gz + touch ${prefix}.vcf.gz.tbi + echo "" | gzip > ${prefix}.g.vcf.gz + touch ${prefix}.g.vcf.gz.tbi + + """ +} diff --git a/modules/local/fibertoolsrs/fire/main.nf b/modules/local/fibertoolsrs/fire/main.nf index 1240f0d5..eed76d97 100644 --- a/modules/local/fibertoolsrs/fire/main.nf +++ b/modules/local/fibertoolsrs/fire/main.nf @@ -1,7 +1,7 @@ process FIBERTOOLSRS_FIRE { tag "$meta.id" label 'process_very_high' - label 'process_high_memory' + label "${params.use_gpu ? 'process_gpu_very_high_memory' : 'process_high_memory'}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/fibertoolsrs/nucleosomes/main.nf b/modules/local/fibertoolsrs/nucleosomes/main.nf index 33a6c5ea..db42d106 100644 --- a/modules/local/fibertoolsrs/nucleosomes/main.nf +++ b/modules/local/fibertoolsrs/nucleosomes/main.nf @@ -1,7 +1,7 @@ process FIBERTOOLSRS_NUCLEOSOMES { tag "$meta.id" label 'process_very_high' - label 'process_high_memory' + label "${params.use_gpu ? 'process_gpu_very_high_memory' : 'process_high_memory'}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/fibertoolsrs/predictm6a/main.nf b/modules/local/fibertoolsrs/predictm6a/main.nf index 5aa174b0..bb355bfe 100644 --- a/modules/local/fibertoolsrs/predictm6a/main.nf +++ b/modules/local/fibertoolsrs/predictm6a/main.nf @@ -1,7 +1,7 @@ process FIBERTOOLSRS_PREDICTM6A { tag "$meta.id" label 'process_very_high' - label 'process_high_memory' + label "${params.use_gpu ? 'process_gpu_very_high_memory' : 'process_high_memory'}" conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? diff --git a/modules/local/longphase/modcall/environment.yml b/modules/local/longphase/modcall/environment.yml new file mode 100644 index 00000000..f436bdae --- /dev/null +++ b/modules/local/longphase/modcall/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda + +dependencies: + - bioconda::htslib=1.23.1 + - bioconda::longphase=2.0.1 diff --git a/modules/local/longphase/modcall/main.nf b/modules/local/longphase/modcall/main.nf new file mode 100644 index 00000000..45880aba --- /dev/null +++ b/modules/local/longphase/modcall/main.nf @@ -0,0 +1,61 @@ +process LONGPHASE_MODCALL { + tag "$meta.id" + label 'process_high' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/83/83fce1d397cf71705cc096fc0e0e52f7013bdd471ef68ee53ae765688e5c439c/data': + 'community.wave.seqera.io/library/longphase_samtools:8c61296cae7a5fc0' }" + + input: + tuple val(meta), path(bam), path(bai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) + + + output: + tuple val(meta), path("*.vcf") , emit: mod_vcf + tuple val(meta), path("*.log") , emit: log , optional: true + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + + """ + longphase \\ + modcall \\ + $args \\ + --threads 1 \\ + -o ${prefix} \\ + --reference ${fasta} \\ + -b ${bam} \\ + --out-prefix ${prefix} + + if [ -f "${prefix}.out" ]; then + mv ${prefix}.out ${prefix}.log + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def log = args.contains('--log') ? "touch ${prefix}.log" : '' + """ + touch ${prefix}.vcf + ${log} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ +} diff --git a/modules/local/vcfsplit/main.nf b/modules/local/vcfsplit/main.nf index df7ce318..f6156d34 100644 --- a/modules/local/vcfsplit/main.nf +++ b/modules/local/vcfsplit/main.nf @@ -31,8 +31,8 @@ process VCFSPLIT { bcftools concat -a -Oz -o somatic.vcf.gz indels_pass.vcf.gz snv_pass.vcf.gz tabix -p vcf somatic.vcf.gz - bcftools view -i 'FILTER="NonSomatic"' $indel_vcf | bgzip -c > indels_filtered.vcf.gz - bcftools view -i 'FILTER="NonSomatic"' $snv_vcf | bgzip -c > snv_filtered.vcf.gz + bcftools view -i 'FILTER~"NonSomatic" || INFO/Verdict_Germline=1' $indel_vcf | bgzip -c > indels_filtered.vcf.gz + bcftools view -i 'FILTER~"NonSomatic" || INFO/Verdict_Germline=1' $snv_vcf | bgzip -c > snv_filtered.vcf.gz tabix -p vcf indels_filtered.vcf.gz tabix -p vcf snv_filtered.vcf.gz bcftools concat -a -Oz -o germline_tmp.vcf.gz indels_filtered.vcf.gz snv_filtered.vcf.gz diff --git a/modules/nf-core/bcftools/annotate/environment.yml b/modules/nf-core/bcftools/annotate/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/annotate/main.nf b/modules/nf-core/bcftools/annotate/main.nf new file mode 100644 index 00000000..18778cc2 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/main.nf @@ -0,0 +1,81 @@ +process BCFTOOLS_ANNOTATE { + tag "${meta.id}" + label 'process_low' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(input), path(index), path(annotations), path(annotations_index), path(columns), path(header_lines), path(rename_chrs) + + output: + tuple val(meta), path("${prefix}.${extension}"), emit: vcf + tuple val(meta), path("${prefix}.${extension}.tbi"), emit: tbi, optional: true + tuple val(meta), path("${prefix}.${extension}.csi"), emit: csi, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def annotations_file = annotations ? "--annotations ${annotations}" : '' + def columns_file = columns ? "--columns-file ${columns}" : '' + def header_file = header_lines ? "--header-lines ${header_lines}" : '' + def rename_chrs_file = rename_chrs ? "--rename-chrs ${rename_chrs}" : '' + extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : "vcf" + def index_command = !index ? "bcftools index ${input}" : '' + + if ("${input}" == "${prefix}.${extension}") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + """ + ${index_command} + + bcftools \\ + annotate \\ + ${args} \\ + ${annotations_file} \\ + ${columns_file} \\ + ${header_file} \\ + ${rename_chrs_file} \\ + --output ${prefix}.${extension} \\ + --threads ${task.cpus} \\ + ${input} + """ + + stub: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") ? "vcf" : "vcf" + def index_extension = args.contains("--write-index=tbi") || args.contains("-W=tbi") + ? "tbi" + : args.contains("--write-index=csi") || args.contains("-W=csi") + ? "csi" + : args.contains("--write-index") || args.contains("-W") ? "csi" : "" + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = extension.endsWith(".gz") && index_extension.matches("csi|tbi") ? "touch ${prefix}.${extension}.${index_extension}" : "" + + if ("${input}" == "${prefix}.${extension}") { + error("Input and output names are the same, set prefix in module configuration to disambiguate!") + } + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + """ +} diff --git a/modules/nf-core/bcftools/annotate/meta.yml b/modules/nf-core/bcftools/annotate/meta.yml new file mode 100644 index 00000000..86331661 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/meta.yml @@ -0,0 +1,112 @@ +name: bcftools_annotate +description: Add or remove annotations. +keywords: + - bcftools + - annotate + - vcf + - remove + - add +tools: + - annotate: + description: Add or remove annotations. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: https://samtools.github.io/bcftools/bcftools.html#annotate + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - input: + type: file + description: Query VCF or BCF file, can be either uncompressed or compressed + ontologies: [] + - index: + type: file + description: Index of the query VCF or BCF file + ontologies: [] + - annotations: + type: file + description: Bgzip-compressed file with annotations + ontologies: [] + - annotations_index: + type: file + description: Index of the annotations file + ontologies: [] + - columns: + type: file + description: List of columns in the annotations file, one name per row + ontologies: [] + - header_lines: + type: file + description: Contains lines to append to the output VCF header + ontologies: [] + - rename_chrs: + type: file + description: Rename annotations according to this file containing "old_name new_name\n" + pairs separated by whitespaces, each on a separate line. + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}": + type: file + description: Compressed annotated VCF file + pattern: "*{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "${prefix}.${extension}.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@projectoriented" + - "@ramprasadn" +maintainers: + - "@projectoriented" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/annotate/tests/main.nf.test b/modules/nf-core/bcftools/annotate/tests/main.nf.test new file mode 100644 index 00000000..3e1d2573 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/main.nf.test @@ -0,0 +1,429 @@ +nextflow_process { + + name "Test Process BCFTOOLS_ANNOTATE" + script "../main.nf" + config "./nextflow.config" + process "BCFTOOLS_ANNOTATE" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/annotate" + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_output") { + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type z" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, [], annotation, annotation_tbi], [], [], [] - vcf_output") { + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type z" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index") { + + when { + params { + args_modules = "--output-type z --write-index --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_csi") { + + when { + params { + args_modules = "--output-type z --write-index=csi --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_tbi") { + + when { + params { + args_modules = "--output-type z --write-index=tbi --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name + ":variantsMD5," + path(it).vcf.variantsMD5 }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, [], annotation, annotation_tbi], [], header, [] - bcf_output") { + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type u" + } + process { + """ + header = channel.of( + '##INFO=', + '##INFO=' + ).collectFile(name:"headers.vcf", newLine:true) + input[0] = channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [] + ]) + .combine(header) + .combine(channel.of([[]])) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, [], annotation, annotation_tbi], columns, [], [] - bcf_output") { + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type u" + } + process { + """ + columns = channel.of('INFO/ICB', 'INFO/HOB', 'INFO/DP4').collectFile(name:"columns.txt", newLine:true) + input[0] = channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ]).combine(columns) + .combine(channel.of([[], []])) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, [], annotation, annotation_tbi], [], header, rename_chrs - vcf_gz_index") { + + when { + params { + args_modules = "--output-type z --write-index --no-version" + } + process { + """ + headers = channel.of( + '##INFO=', + '##INFO=' + ).collectFile(name:"headers.vcf", newLine:true) + rename = channel.of('MT192765.1 renamed').collectFile(name:"rename.txt", newLine:true) + input[0] = channel.of([ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [] + ]).combine(headers) + .combine(rename) + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - stub") { + + options "-stub" + + when { + params { + args_modules = "-x ID,INFO/DP,FORMAT/DP --output-type z" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index - stub") { + + options "-stub" + + when { + params { + args_modules = "--output-type z --write-index --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_csi - stub") { + + options "-stub" + + when { + params { + args_modules = "--output-type z --write-index=csi --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_tbi - stub") { + + options "-stub" + + when { + params { + args_modules = "--output-type z --write-index=tbi --no-version" + } + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true), + [], [], [] + ] + """ + } + } + + then { + assert process.success + assertAll( + { assert snapshot( + process.out.vcf.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name }}, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } +} diff --git a/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap b/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap new file mode 100644 index 00000000..10af196a --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/main.nf.test.snap @@ -0,0 +1,440 @@ +{ + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:19.618749659" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,bc7bf3ee9e8430e064c539eb81e59bf9" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.tbi" + ] + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:43.350060834" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,bc7bf3ee9e8430e064c539eb81e59bf9" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:36.101003418" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_tbi - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.tbi" + ] + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:34.19449127" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index_csi - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:26.927815399" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,bc7bf3ee9e8430e064c539eb81e59bf9" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:28.891823681" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - stub": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:12.400301681" + }, + "sarscov2 - [vcf, [], annotation, annotation_tbi], [], header, [] - bcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.bcf" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:50.375384421" + }, + "sarscov2 - [vcf, [], annotation, annotation_tbi], [], [], [] - vcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,250b64289ab9d48f76359d01699fdf7d" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:21.320211288" + }, + "sarscov2 - [vcf, tbi, annotation, annotation_tbi], [], [], [] - vcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz:variantsMD5,250b64289ab9d48f76359d01699fdf7d" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:13.274072987" + }, + "sarscov2 - [vcf, [], annotation, annotation_tbi], [], header, rename_chrs - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz" + ] + ], + [ + + ], + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.vcf.gz.csi" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:29:05.094685409" + }, + "sarscov2 - [vcf, [], annotation, annotation_tbi], columns, [], [] - bcf_output": { + "content": [ + [ + [ + { + "id": "test", + "single_end": false + }, + "test_ann.bcf" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_ANNOTATE", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.3" + }, + "timestamp": "2026-02-27T15:28:57.906382655" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/annotate/tests/nextflow.config b/modules/nf-core/bcftools/annotate/tests/nextflow.config new file mode 100644 index 00000000..10235100 --- /dev/null +++ b/modules/nf-core/bcftools/annotate/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args = "${params.args_modules}" + ext.prefix = { "${meta.id}_ann" } +} diff --git a/modules/nf-core/bcftools/isec/bcftools-isec.diff b/modules/nf-core/bcftools/isec/bcftools-isec.diff new file mode 100644 index 00000000..bea9e9d9 --- /dev/null +++ b/modules/nf-core/bcftools/isec/bcftools-isec.diff @@ -0,0 +1,46 @@ +Changes in component 'nf-core/bcftools/isec' +'modules/nf-core/bcftools/isec/meta.yml' is unchanged +Changes in 'bcftools/isec/main.nf': +--- modules/nf-core/bcftools/isec/main.nf ++++ modules/nf-core/bcftools/isec/main.nf +@@ -12,6 +12,15 @@ + + output: + tuple val(meta), path("${prefix}", type: "dir"), emit: results ++ tuple val(meta), path("${prefix}/0002.vcf.gz"), emit: deepvar_consensus_vcf ++ tuple val(meta), path("${prefix}/0002.vcf.gz.tbi"), emit: deepvar_consensus_tbi ++ tuple val(meta), path("${prefix}/0003.vcf.gz"), emit: clair_consensus_vcf ++ tuple val(meta), path("${prefix}/0003.vcf.gz.tbi"), emit: clair_consensus_tbi ++ tuple val(meta), path("${prefix}/0001.vcf.gz"), emit: clair_private_vcf ++ tuple val(meta), path("${prefix}/0001.vcf.gz.tbi"), emit: clair_private_tbi ++ tuple val(meta), path("${prefix}/0000.vcf.gz"), emit: deepvar_private_vcf ++ tuple val(meta), path("${prefix}/0000.vcf.gz.tbi"), emit: deepvar_private_tbi ++ + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: +@@ -30,7 +39,7 @@ + ${targets_file_args} \\ + ${regions_file_args} \\ + -p ${prefix} \\ +- ${vcf_files} \\ ++ ${vcf_files} + """ + + stub: +@@ -43,5 +52,9 @@ + touch ${prefix}/0000.vcf.gz.tbi + echo "" | gzip > ${prefix}/0001.vcf.gz + touch ${prefix}/0001.vcf.gz.tbi ++ echo "" | gzip > ${prefix}/0002.vcf.gz ++ touch ${prefix}/0002.vcf.gz.tbi ++ echo "" | gzip > ${prefix}/0003.vcf.gz ++ touch ${prefix}/0003.vcf.gz.tbi + """ + } + +'modules/nf-core/bcftools/isec/environment.yml' is unchanged +'modules/nf-core/bcftools/isec/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/isec/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/isec/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/bcftools/isec/environment.yml b/modules/nf-core/bcftools/isec/environment.yml new file mode 100644 index 00000000..cb55500b --- /dev/null +++ b/modules/nf-core/bcftools/isec/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::bcftools=1.22 + - bioconda::htslib=1.22.1 diff --git a/modules/nf-core/bcftools/isec/main.nf b/modules/nf-core/bcftools/isec/main.nf new file mode 100644 index 00000000..cda1a662 --- /dev/null +++ b/modules/nf-core/bcftools/isec/main.nf @@ -0,0 +1,60 @@ +process BCFTOOLS_ISEC { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcfs), path(tbis), path(file_list), path(targets_file), path(regions_file) + + output: + tuple val(meta), path("${prefix}", type: "dir"), emit: results + tuple val(meta), path("${prefix}/0002.vcf.gz"), emit: deepvar_consensus_vcf + tuple val(meta), path("${prefix}/0002.vcf.gz.tbi"), emit: deepvar_consensus_tbi + tuple val(meta), path("${prefix}/0003.vcf.gz"), emit: clair_consensus_vcf + tuple val(meta), path("${prefix}/0003.vcf.gz.tbi"), emit: clair_consensus_tbi + tuple val(meta), path("${prefix}/0001.vcf.gz"), emit: clair_private_vcf + tuple val(meta), path("${prefix}/0001.vcf.gz.tbi"), emit: clair_private_tbi + tuple val(meta), path("${prefix}/0000.vcf.gz"), emit: deepvar_private_vcf + tuple val(meta), path("${prefix}/0000.vcf.gz.tbi"), emit: deepvar_private_tbi + + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + targets_file_args = targets_file ? "-T ${targets_file}" : '' + regions_file_args = regions_file ? "-R ${regions_file}" : '' + vcf_files = file_list ? "-l ${file_list}" : "${vcfs}" + + """ + bcftools isec \\ + ${args} \\ + ${targets_file_args} \\ + ${regions_file_args} \\ + -p ${prefix} \\ + ${vcf_files} + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + """ + mkdir ${prefix} + touch ${prefix}/README.txt + touch ${prefix}/sites.txt + echo "" | gzip > ${prefix}/0000.vcf.gz + touch ${prefix}/0000.vcf.gz.tbi + echo "" | gzip > ${prefix}/0001.vcf.gz + touch ${prefix}/0001.vcf.gz.tbi + echo "" | gzip > ${prefix}/0002.vcf.gz + touch ${prefix}/0002.vcf.gz.tbi + echo "" | gzip > ${prefix}/0003.vcf.gz + touch ${prefix}/0003.vcf.gz.tbi + """ +} diff --git a/modules/nf-core/bcftools/isec/meta.yml b/modules/nf-core/bcftools/isec/meta.yml new file mode 100644 index 00000000..051e141e --- /dev/null +++ b/modules/nf-core/bcftools/isec/meta.yml @@ -0,0 +1,101 @@ +name: bcftools_isec +description: Apply set operations to VCF files +keywords: + - variant calling + - intersect + - union + - complement + - VCF + - BCF +tools: + - isec: + description: | + Computes intersections, unions and complements of VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcfs: + type: list + description: | + List containing 2 or more vcf/bcf files. These must be compressed and have an associated index. + e.g. [ 'file1.vcf.gz', 'file2.vcf' ] + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: + - edam: "http://edamontology.org/format_3016" # VCF + - edam: "http://edamontology.org/format_3570" # BCF + - tbis: + type: list + description: | + List containing the tbi index files corresponding to the vcf/bcf input files + pattern: "*.tbi" + ontologies: + - edam: "http://edamontology.org/format_3475" # Tabix index + - file_list: + type: file + description: | + Optional text file containing the list of VCF/BCF files to be processed by bcftools isec, one per line. + ontologies: + - edam: "http://edamontology.org/format_2330" # Text file + - targets_file: + type: file + description: | + Optional file containing target regions to restrict the analysis to. + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - edam: "http://edamontology.org/format_3475" # Tab-separated + - regions_file: + type: file + description: | + Optional file containing regions to restrict the analysis to. + ontologies: + - edam: "http://edamontology.org/format_3003" # BED + - edam: "http://edamontology.org/format_3475" # Tab-separated +output: + results: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - ${prefix}: + type: directory + description: Directory containing the output files from bcftools isec + pattern: "${prefix}/" + ontologies: + - edam: "http://edamontology.org/format_3016" # VCF + - edam: "http://edamontology.org/format_3570" # BCF + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - bcftools --version | sed '1!d; s/^.*bcftools //': + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - bcftools --version | sed '1!d; s/^.*bcftools //': + type: string + description: The command used to generate the version of the tool +authors: + - "@joseespinosa" + - "@drpatelh" +maintainers: + - "@joseespinosa" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/isec/tests/main.nf.test b/modules/nf-core/bcftools/isec/tests/main.nf.test new file mode 100644 index 00000000..d0a1f751 --- /dev/null +++ b/modules/nf-core/bcftools/isec/tests/main.nf.test @@ -0,0 +1,318 @@ +nextflow_process { + + name "Test Process BCFTOOLS_ISEC" + script "../main.nf" + process "BCFTOOLS_ISEC" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/isec" + + config "./nextflow.config" + + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]]") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [], + [] + ] + """ + } + } + + then { + def results_dir = new File(process.out.results[0][1]) + def results_list = [] + results_dir.eachFileRecurse { file -> results_list << file.getName() } + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions") }, + results_list.sort(), + path("${process.out.results[0][1]}").list().findAll { + it.getFileName().toString() != "0000.vcf.gz.tbi" && it.getFileName().toString() != "0001.vcf.gz.tbi" + } + ).match() + } + ) + } + } + + test("sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - targets") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)], + [] + ] + """ + } + } + + then { + def results_dir = new File(process.out.results[0][1]) + def results_list = [] + results_dir.eachFileRecurse { file -> results_list << file.getName() } + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions") }, + results_list.sort(), + path("${process.out.results[0][1]}").list().findAll { + it.getFileName().toString() != "0000.vcf.gz.tbi" && it.getFileName().toString() != "0001.vcf.gz.tbi" && it.getFileName().toString() != "sites.txt" + } + ).match() + } + ) + } + } + + test("sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - targets - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - regions") { + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [], + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)] + ] + """ + } + } + + then { + def results_dir = new File(process.out.results[0][1]) + def results_list = [] + results_dir.eachFileRecurse { file -> results_list << file.getName() } + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions") }, + results_list.sort(), + path("${process.out.results[0][1]}").list().findAll { + it.getFileName().toString() != "0000.vcf.gz.tbi" && it.getFileName().toString() != "0001.vcf.gz.tbi" && it.getFileName().toString() != "sites.txt" + } + ).match() + } + ) + } + } + + test("sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - regions - stub") { + options "-stub" + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [], + [], + [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/bed/test2.bed', checkIfExists: true)] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - file_list") { + + setup{ + new File("${launchDir}/file_list.txt").text = """ + test.vcf.gz + test2.vcf.gz + """.stripIndent().trim() + } + + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [file("${launchDir}/file_list.txt", checkIfExists: true)], + [], + [] + ] + """ + } + } + + then { + def results_dir = new File(process.out.results[0][1]) + def results_list = [] + results_dir.eachFileRecurse { file -> results_list << file.getName() } + assertAll( + { assert process.success }, + { assert snapshot( + process.out.findAll { key, val -> key.startsWith("versions") }, + results_list.sort(), + path("${process.out.results[0][1]}").list().findAll { + it.getFileName().toString() != "0000.vcf.gz.tbi" && it.getFileName().toString() != "0001.vcf.gz.tbi" + } + ).match() + } + ) + } + } + test("sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - file_list - stub") { + options "-stub" + + setup{ + new File("${launchDir}/file_list.txt").text = """ + test.vcf.gz + test2.vcf.gz + """.stripIndent().trim() + } + + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz', checkIfExists: true) + ], + [ + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.vcf.gz.tbi', checkIfExists: true) + ], + [file("${launchDir}/file_list.txt", checkIfExists: true)], + [], + [] + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + + ) + } + } +} diff --git a/modules/nf-core/bcftools/isec/tests/main.nf.test.snap b/modules/nf-core/bcftools/isec/tests/main.nf.test.snap new file mode 100644 index 00000000..8a2cb65f --- /dev/null +++ b/modules/nf-core/bcftools/isec/tests/main.nf.test.snap @@ -0,0 +1,348 @@ +{ + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]]": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + }, + [ + "0000.vcf.gz", + "0000.vcf.gz.tbi", + "0001.vcf.gz", + "0001.vcf.gz.tbi", + "README.txt", + "sites.txt" + ], + [ + "0000.vcf.gz:md5,8e722884ffb75155212a3fc053918766", + "0001.vcf.gz:md5,b39a72f91458b94b346dd73690207649", + "README.txt:md5,10fc33b66522645600d44afbd41fb792", + "sites.txt:md5,1cea3fbde7f6d3c97f3d39036f9690df" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:48.711543241" + }, + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - targets": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + }, + [ + "0000.vcf.gz", + "0000.vcf.gz.tbi", + "0001.vcf.gz", + "0001.vcf.gz.tbi", + "README.txt", + "sites.txt" + ], + [ + "0000.vcf.gz:md5,565cbbb0d930be20fc235604da695623", + "0001.vcf.gz:md5,d65e9e45a4c5f45873cb26b80c81b213", + "README.txt:md5,f4190b7943f8f12886ad57ecaedd0c43" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-28T11:48:46.533255686" + }, + "sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - regions - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ], + "results": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-23T19:06:04.239620535" + }, + "sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - targets - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ], + "results": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-23T18:58:08.73508502" + }, + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - file_list": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + }, + [ + "0000.vcf.gz", + "0000.vcf.gz.tbi", + "0001.vcf.gz", + "0001.vcf.gz.tbi", + "README.txt", + "sites.txt" + ], + [ + "0000.vcf.gz:md5,8e722884ffb75155212a3fc053918766", + "0001.vcf.gz:md5,b39a72f91458b94b346dd73690207649", + "README.txt:md5,4426b6b26b177d85e150f06bd5138411", + "sites.txt:md5,1cea3fbde7f6d3c97f3d39036f9690df" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-27T12:52:50.066330847" + }, + "sarscov2 - [[vcf1, vcf2], [tbi1, tbi2]] - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ], + "results": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:03:56.874977547" + }, + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - regions": { + "content": [ + { + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + }, + [ + "0000.vcf.gz", + "0000.vcf.gz.tbi", + "0001.vcf.gz", + "0001.vcf.gz.tbi", + "README.txt", + "sites.txt" + ], + [ + "0000.vcf.gz:md5,565cbbb0d930be20fc235604da695623", + "0001.vcf.gz:md5,d65e9e45a4c5f45873cb26b80c81b213", + "README.txt:md5,16eeab1b2463bab4d498a4dfdaa297fa" + ] + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-28T11:49:26.428693544" + }, + "sarscov2 - [[vcf1.gz, vcf2.gz], [tbi1, tbi2]] - file_list - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "1": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ], + "results": [ + [ + { + "id": "test" + }, + [ + "0000.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0000.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "0001.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940", + "0001.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e", + "README.txt:md5,d41d8cd98f00b204e9800998ecf8427e", + "sites.txt:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_ISEC", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.04.8" + }, + "timestamp": "2026-01-28T11:55:27.123701797" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/isec/tests/nextflow.config b/modules/nf-core/bcftools/isec/tests/nextflow.config new file mode 100644 index 00000000..ac887d6b --- /dev/null +++ b/modules/nf-core/bcftools/isec/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = '--nfiles +2 --output-type z --no-version' +} diff --git a/modules/nf-core/bcftools/merge/bcftools-merge.diff b/modules/nf-core/bcftools/merge/bcftools-merge.diff new file mode 100644 index 00000000..6a8812db --- /dev/null +++ b/modules/nf-core/bcftools/merge/bcftools-merge.diff @@ -0,0 +1,21 @@ +Changes in component 'nf-core/bcftools/merge' +'modules/nf-core/bcftools/merge/meta.yml' is unchanged +Changes in 'bcftools/merge/main.nf': +--- modules/nf-core/bcftools/merge/main.nf ++++ modules/nf-core/bcftools/merge/main.nf +@@ -9,7 +9,8 @@ + + input: + tuple val(meta), path(vcfs), path(tbis), path(bed) +- tuple val(meta2), path(fasta), path(fai) ++ tuple val(meta2), path(fasta) ++ tuple val(meta3), path(fai) + + output: + tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: vcf + +'modules/nf-core/bcftools/merge/environment.yml' is unchanged +'modules/nf-core/bcftools/merge/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/merge/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/merge/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/bcftools/merge/main.nf b/modules/nf-core/bcftools/merge/main.nf index f1acbd3d..f295c0e6 100644 --- a/modules/nf-core/bcftools/merge/main.nf +++ b/modules/nf-core/bcftools/merge/main.nf @@ -9,7 +9,8 @@ process BCFTOOLS_MERGE { input: tuple val(meta), path(vcfs), path(tbis), path(bed) - tuple val(meta2), path(fasta), path(fai) + tuple val(meta2), path(fasta) + tuple val(meta3), path(fai) output: tuple val(meta), path("*.{bcf,vcf}{,.gz}"), emit: vcf diff --git a/modules/nf-core/bcftools/norm/environment.yml b/modules/nf-core/bcftools/norm/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/norm/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf new file mode 100644 index 00000000..443c8bbb --- /dev/null +++ b/modules/nf-core/bcftools/norm/main.nf @@ -0,0 +1,71 @@ +process BCFTOOLS_NORM { + tag "${meta.id}" + label 'process_medium' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf), path(tbi) + tuple val(meta2), path(fasta) + + output: + tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}"), emit: vcf + tuple val(meta), path("*.tbi"), emit: tbi, optional: true + tuple val(meta), path("*.csi"), emit: csi, optional: true + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf.gz" + """ + bcftools norm \\ + --fasta-ref ${fasta} \\ + --output ${prefix}.${extension} \\ + ${args} \\ + --threads ${task.cpus} \\ + ${vcf} + """ + + stub: + def args = task.ext.args ?: '--output-type z' + def prefix = task.ext.prefix ?: "${meta.id}" + def extension = args.contains("--output-type b") || args.contains("-Ob") + ? "bcf.gz" + : args.contains("--output-type u") || args.contains("-Ou") + ? "bcf" + : args.contains("--output-type z") || args.contains("-Oz") + ? "vcf.gz" + : args.contains("--output-type v") || args.contains("-Ov") + ? "vcf" + : "vcf.gz" + def index = '' + if (extension in ['vcf.gz', 'bcf', 'bcf.gz']) { + if (['--write-index=tbi', '-W=tbi'].any { arg -> args.contains(arg) } && extension == 'vcf.gz') { + index = 'tbi' + } + else if (['--write-index=tbi', '-W=tbi', '--write-index=csi', '-W=csi', '--write-index', '-W'].any { arg -> args.contains(arg) }) { + index = 'csi' + } + } + def create_cmd = extension.endsWith(".gz") ? "echo '' | gzip >" : "touch" + def create_index = index ? "touch ${prefix}.${extension}.${index}" : "" + """ + ${create_cmd} ${prefix}.${extension} + ${create_index} + """ +} diff --git a/modules/nf-core/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml new file mode 100644 index 00000000..9feecac0 --- /dev/null +++ b/modules/nf-core/bcftools/norm/meta.yml @@ -0,0 +1,107 @@ +name: bcftools_norm +description: Normalize VCF file +keywords: + - normalize + - norm + - variant calling + - VCF +tools: + - norm: + description: | + Normalize VCF files. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be normalized + e.g. 'file1.vcf' + pattern: "*.{vcf,vcf.gz}" + ontologies: [] + - tbi: + type: file + description: | + An optional index of the VCF file (for when the VCF is compressed) + pattern: "*.vcf.gz.tbi" + ontologies: [] + - - meta2: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: FASTA reference file + pattern: "*.{fasta,fa}" + ontologies: [] +output: + vcf: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.{vcf,vcf.gz,bcf,bcf.gz}": + type: file + description: One of uncompressed VCF (.vcf), compressed VCF (.vcf.gz), compressed + BCF (.bcf.gz) or uncompressed BCF (.bcf) normalized output file + pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}" + ontologies: [] + tbi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.tbi": + type: file + description: Alternative VCF file index + pattern: "*.tbi" + ontologies: [] + csi: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.csi": + type: file + description: Default VCF file index + pattern: "*.csi" + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@abhi18av" + - "@ramprasadn" +maintainers: + - "@abhi18av" + - "@ramprasadn" diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test b/modules/nf-core/bcftools/norm/tests/main.nf.test new file mode 100644 index 00000000..05851753 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test @@ -0,0 +1,545 @@ +nextflow_process { + + name "Test Process BCFTOOLS_NORM" + script "../main.nf" + process "BCFTOOLS_NORM" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/norm" + + test("sarscov2 - [ vcf, [] ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index") { + + config "./vcf_gz_index.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi") { + + config "./vcf_gz_index_csi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi") { + + config "./vcf_gz_index_tbi.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } } + ).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta") { + + config "./nextflow.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.vcf, + process.out.csi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.tbi.collect { it.collect { it instanceof Map ? it : file(it).name } }, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output") { + + config "./nextflow.bcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output") { + + config "./nextflow.bcf_gz.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta -stub") { + + config "./nextflow.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf output -stub") { + + config "./nextflow.vcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub") { + + config "./nextflow.vcf.config" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf output - stub") { + + config "./nextflow.bcf.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub") { + + config "./nextflow.bcf_gz.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub") { + + config "./vcf_gz_index.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub") { + + config "./vcf_gz_index_csi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.csi[0][1].endsWith(".csi") } + ) + } + } + + test("sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub") { + + config "./vcf_gz_index_tbi.config" + options "-stub" + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + [] + ] + input[1] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + { assert process.out.tbi[0][1].endsWith(".tbi") } + ) + } + } + + +} diff --git a/modules/nf-core/bcftools/norm/tests/main.nf.test.snap b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap new file mode 100644 index 00000000..ee2dadf7 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/main.nf.test.snap @@ -0,0 +1,876 @@ +{ + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:29.987030961" + }, + "sarscov2 - [ vcf, [] ], fasta - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:06.488086505" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:34.863776359" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:54.718705045" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf_gz output": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ], + [ + + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:43.007377633" + }, + "sarscov2 - [ vcf, [] ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:07:54.877084219" + }, + "sarscov2 - [ vcf, tbi ], fasta - vcf output -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:22.220435939" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,bf88706ef69c44ca9e287bc953ba3593" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,bf88706ef69c44ca9e287bc953ba3593" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:58.483532889" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:10:03.22576704" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:28.356741947" + }, + "sarscov2 - [ vcf, tbi ], fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:27.281315407" + }, + "sarscov2 - [ vcf, tbi ], fasta -stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:14.249715835" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf_gz output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:46.665932019" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T07:52:58.381931979" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:09:38.144449162" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_tbi - stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "1": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.tbi:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:10:10.602984345" + }, + "sarscov2 - [ vcf, [] ], fasta - vcf_gz_index_csi": { + "content": [ + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz:md5,63e5adbaf3dd94550e9e3d7935dd28db" + ] + ], + [ + [ + { + "id": "test" + }, + "test_vcf.vcf.gz.csi" + ] + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "24.04.2" + }, + "timestamp": "2024-06-05T13:53:09.808834237" + }, + "sarscov2 - [ vcf, tbi ], fasta - bcf output": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "1": [ + + ], + "2": [ + + ], + "3": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ], + "csi": [ + + ], + "tbi": [ + + ], + "vcf": [ + [ + { + "id": "test" + }, + "test_norm.bcf.gz:md5,638c3c25bdd495c90ecbccb69ee77f07" + ] + ], + "versions_bcftools": [ + [ + "BCFTOOLS_NORM", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:08:51.053195842" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config new file mode 100644 index 00000000..b79af868 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type b --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config new file mode 100644 index 00000000..f36f397c --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.bcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type u --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.config b/modules/nf-core/bcftools/norm/tests/nextflow.config new file mode 100644 index 00000000..510803b4 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config new file mode 100644 index 00000000..10bf93e3 --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type v --no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config new file mode 100644 index 00000000..b31dd2de --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/nextflow.vcf_gz.config @@ -0,0 +1,4 @@ +process { + ext.args = '-m -any --output-type z ---no-version' + ext.prefix = "test_norm" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config new file mode 100644 index 00000000..7dd696ee --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index --no-version" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config new file mode 100644 index 00000000..aebffb6f --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_csi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=csi --no-version" +} diff --git a/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config new file mode 100644 index 00000000..b192ae7d --- /dev/null +++ b/modules/nf-core/bcftools/norm/tests/vcf_gz_index_tbi.config @@ -0,0 +1,4 @@ +process { + ext.prefix = { "${meta.id}_vcf" } + ext.args = "--output-type z --write-index=tbi --no-version" +} diff --git a/modules/nf-core/bcftools/query/bcftools-query.diff b/modules/nf-core/bcftools/query/bcftools-query.diff new file mode 100644 index 00000000..790c3808 --- /dev/null +++ b/modules/nf-core/bcftools/query/bcftools-query.diff @@ -0,0 +1,40 @@ +Changes in component 'nf-core/bcftools/query' +'modules/nf-core/bcftools/query/meta.yml' is unchanged +Changes in 'bcftools/query/main.nf': +--- modules/nf-core/bcftools/query/main.nf ++++ modules/nf-core/bcftools/query/main.nf +@@ -14,7 +14,8 @@ + path samples + + output: +- tuple val(meta), path("*.${suffix}"), emit: output ++ tuple val(meta), path("*.${suffix}.gz"), emit: output ++ tuple val(meta), path("*.${suffix}.gz.tbi"), emit: index + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: +@@ -35,12 +36,17 @@ + ${args} \\ + ${vcf} \\ + > ${prefix}.${suffix} ++ bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz ++ tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + """ +- touch ${prefix}.${suffix} \\ ++ touch ${prefix}.${suffix} ++ bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz ++ touch ${prefix}.${suffix}.gz.tbi ++ tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz + """ + } + +'modules/nf-core/bcftools/query/environment.yml' is unchanged +'modules/nf-core/bcftools/query/tests/main.nf.test' is unchanged +'modules/nf-core/bcftools/query/tests/main.nf.test.snap' is unchanged +'modules/nf-core/bcftools/query/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/bcftools/query/environment.yml b/modules/nf-core/bcftools/query/environment.yml new file mode 100644 index 00000000..ba863b38 --- /dev/null +++ b/modules/nf-core/bcftools/query/environment.yml @@ -0,0 +1,10 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +channels: + - conda-forge + - bioconda +dependencies: + # renovate: datasource=conda depName=bioconda/htslib + - bioconda::htslib=1.22.1 + # renovate: datasource=conda depName=bioconda/bcftools + - bioconda::bcftools=1.22 diff --git a/modules/nf-core/bcftools/query/main.nf b/modules/nf-core/bcftools/query/main.nf new file mode 100644 index 00000000..4d2da568 --- /dev/null +++ b/modules/nf-core/bcftools/query/main.nf @@ -0,0 +1,52 @@ +process BCFTOOLS_QUERY { + tag "${meta.id}" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container + ? 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/47/474a5ea8dc03366b04df884d89aeacc4f8e6d1ad92266888e7a8e7958d07cde8/data' + : 'community.wave.seqera.io/library/bcftools_htslib:0a3fa2654b52006f'}" + + input: + tuple val(meta), path(vcf), path(tbi) + path regions + path targets + path samples + + output: + tuple val(meta), path("*.${suffix}.gz"), emit: output + tuple val(meta), path("*.${suffix}.gz.tbi"), emit: index + tuple val("${task.process}"), val('bcftools'), eval("bcftools --version | sed '1!d; s/^.*bcftools //'"), topic: versions, emit: versions_bcftools + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + def regions_file = regions ? "--regions-file ${regions}" : "" + def targets_file = targets ? "--targets-file ${targets}" : "" + def samples_file = samples ? "--samples-file ${samples}" : "" + """ + bcftools query \\ + ${regions_file} \\ + ${targets_file} \\ + ${samples_file} \\ + ${args} \\ + ${vcf} \\ + > ${prefix}.${suffix} + bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz + tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz + """ + + stub: + def prefix = task.ext.prefix ?: "${meta.id}" + suffix = task.ext.suffix ?: "txt" + """ + touch ${prefix}.${suffix} + bgzip -c ${prefix}.${suffix} > ${prefix}.${suffix}.gz + touch ${prefix}.${suffix}.gz.tbi + tabix -s 1 -b 2 -e 2 ${prefix}.${suffix}.gz + """ +} diff --git a/modules/nf-core/bcftools/query/meta.yml b/modules/nf-core/bcftools/query/meta.yml new file mode 100644 index 00000000..6bcb5e57 --- /dev/null +++ b/modules/nf-core/bcftools/query/meta.yml @@ -0,0 +1,89 @@ +name: bcftools_query +description: Extracts fields from VCF or BCF files and outputs them in user-defined + format. +keywords: + - query + - variant calling + - bcftools + - VCF +tools: + - query: + description: | + Extracts fields from VCF or BCF files and outputs them in user-defined format. + homepage: http://samtools.github.io/bcftools/bcftools.html + documentation: http://www.htslib.org/doc/bcftools.html + doi: 10.1093/bioinformatics/btp352 + licence: ["MIT"] + identifier: biotools:bcftools +input: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - vcf: + type: file + description: | + The vcf file to be queried. + pattern: "*.{vcf.gz, vcf}" + ontologies: [] + - tbi: + type: file + description: | + The tab index for the VCF file to be inspected. + pattern: "*.tbi" + ontologies: [] + - regions: + type: file + description: | + Optionally, restrict the operation to regions listed in this file. + ontologies: [] + - targets: + type: file + description: | + Optionally, restrict the operation to regions listed in this file (doesn't rely upon index files) + ontologies: [] + - samples: + type: file + description: | + Optional, file of sample names to be included or excluded. + e.g. 'file.tsv' + ontologies: [] +output: + output: + - - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - "*.${suffix}": + type: file + description: BCFTools query output file + ontologies: [] + versions_bcftools: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +topics: + versions: + - - ${task.process}: + type: string + description: The process the versions were collected from + - bcftools: + type: string + description: The tool name + - "bcftools --version | sed '1!d; s/^.*bcftools //'": + type: string + description: The command used to generate the version of the tool +authors: + - "@abhi18av" + - "@drpatelh" +maintainers: + - "@abhi18av" + - "@drpatelh" diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test b/modules/nf-core/bcftools/query/tests/main.nf.test new file mode 100644 index 00000000..63ac5af8 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test @@ -0,0 +1,97 @@ +nextflow_process { + + name "Test Process BCFTOOLS_QUERY" + script "../main.nf" + process "BCFTOOLS_QUERY" + + tag "modules" + tag "modules_nfcore" + tag "bcftools" + tag "bcftools/query" + + config "./nextflow.config" + + test("sarscov2 - [vcf, tbi], [], [], []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi], vcf, tsv, []") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test3.vcf.gz', checkIfExists: true) + input[2] = file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test2.targets.tsv.gz', checkIfExists: true) + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + process.out.output, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } + + test("sarscov2 - [vcf, tbi], [], [], [] - stub") { + + when { + process { + """ + input[0] = [ + [ id:'out' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz', checkIfExists: true), + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/vcf/test.vcf.gz.tbi', checkIfExists: true) + ] + input[1] = [] + input[2] = [] + input[3] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot( + file(process.out.output[0][1]).name, + process.out.findAll { key, val -> key.startsWith("versions") } + ).match() } + ) + } + } +} diff --git a/modules/nf-core/bcftools/query/tests/main.nf.test.snap b/modules/nf-core/bcftools/query/tests/main.nf.test.snap new file mode 100644 index 00000000..5168ef3f --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/main.nf.test.snap @@ -0,0 +1,73 @@ +{ + "sarscov2 - [vcf, tbi], vcf, tsv, []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,75a6bd0084e2e1838cf7baba11b99d19" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_QUERY", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:16:54.523612853" + }, + "sarscov2 - [vcf, tbi], [], [], [] - stub": { + "content": [ + "out.txt", + { + "versions_bcftools": [ + [ + "BCFTOOLS_QUERY", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:17:00.64798632" + }, + "sarscov2 - [vcf, tbi], [], [], []": { + "content": [ + [ + [ + { + "id": "out" + }, + "out.txt:md5,87a2ab194e1ee3219b44e58429ec3307" + ] + ], + { + "versions_bcftools": [ + [ + "BCFTOOLS_QUERY", + "bcftools", + "1.22" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-20T12:16:47.953130141" + } +} \ No newline at end of file diff --git a/modules/nf-core/bcftools/query/tests/nextflow.config b/modules/nf-core/bcftools/query/tests/nextflow.config new file mode 100644 index 00000000..8547ec10 --- /dev/null +++ b/modules/nf-core/bcftools/query/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args = "-f '%CHROM %POS %REF %ALT[%SAMPLE=%GT]'" +} diff --git a/modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff b/modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff new file mode 100644 index 00000000..e4aa7b97 --- /dev/null +++ b/modules/nf-core/deepvariant/callvariants/deepvariant-callvariants.diff @@ -0,0 +1,18 @@ +Changes in component 'nf-core/deepvariant/callvariants' +'modules/nf-core/deepvariant/callvariants/meta.yml' is unchanged +Changes in 'deepvariant/callvariants/main.nf': +--- modules/nf-core/deepvariant/callvariants/main.nf ++++ modules/nf-core/deepvariant/callvariants/main.nf +@@ -2,6 +2,7 @@ + process DEEPVARIANT_CALLVARIANTS { + tag "$meta.id" + label 'process_high' ++ label "${params.use_gpu ? 'process_gpu' : 'process_noaccel'}" + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" + +'modules/nf-core/deepvariant/callvariants/tests/main.nf.test' is unchanged +'modules/nf-core/deepvariant/callvariants/tests/main.nf.test.snap' is unchanged +'modules/nf-core/deepvariant/callvariants/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/deepvariant/callvariants/main.nf b/modules/nf-core/deepvariant/callvariants/main.nf index 2fc656ee..a8516403 100644 --- a/modules/nf-core/deepvariant/callvariants/main.nf +++ b/modules/nf-core/deepvariant/callvariants/main.nf @@ -1,10 +1,10 @@ process DEEPVARIANT_CALLVARIANTS { tag "$meta.id" - label 'process_high' + label "${params.use_gpu ? 'process_gpu_very_high' : 'process_very_high'}" //Conda is not supported at the moment - container "docker.io/google/deepvariant:1.9.0" + container params.use_gpu ? "docker.io/google/deepvariant:1.9.0-gpu" : "docker.io/google/deepvariant:1.9.0" input: tuple val(meta), path(make_examples_tfrecords) diff --git a/modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff b/modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff new file mode 100644 index 00000000..ee309ee6 --- /dev/null +++ b/modules/nf-core/deepvariant/makeexamples/deepvariant-makeexamples.diff @@ -0,0 +1,26 @@ +Changes in component 'nf-core/deepvariant/makeexamples' +'modules/nf-core/deepvariant/makeexamples/meta.yml' is unchanged +Changes in 'deepvariant/makeexamples/main.nf': +--- modules/nf-core/deepvariant/makeexamples/main.nf ++++ modules/nf-core/deepvariant/makeexamples/main.nf +@@ -1,6 +1,6 @@ + process DEEPVARIANT_MAKEEXAMPLES { + tag "$meta.id" +- label 'process_high' ++ label 'process_very_high' + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" +@@ -36,6 +36,7 @@ + --mode calling \\ + --ref "${fasta}" \\ + --reads "${input}" \\ ++ --sample_name ${prefix} \\ + --examples "./${prefix}.examples.tfrecord@${task.cpus}.gz" \\ + --gvcf "./${prefix}.gvcf.tfrecord@${task.cpus}.gz" \\ + ${regions} \\ + +'modules/nf-core/deepvariant/makeexamples/tests/main.nf.test' is unchanged +'modules/nf-core/deepvariant/makeexamples/tests/main.nf.test.snap' is unchanged +'modules/nf-core/deepvariant/makeexamples/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/deepvariant/makeexamples/main.nf b/modules/nf-core/deepvariant/makeexamples/main.nf index 77d2f331..05bd5a93 100644 --- a/modules/nf-core/deepvariant/makeexamples/main.nf +++ b/modules/nf-core/deepvariant/makeexamples/main.nf @@ -1,6 +1,6 @@ process DEEPVARIANT_MAKEEXAMPLES { tag "$meta.id" - label 'process_high' + label 'process_very_high' //Conda is not supported at the moment container "docker.io/google/deepvariant:1.9.0" @@ -36,6 +36,7 @@ process DEEPVARIANT_MAKEEXAMPLES { --mode calling \\ --ref "${fasta}" \\ --reads "${input}" \\ + --sample_name ${prefix} \\ --examples "./${prefix}.examples.tfrecord@${task.cpus}.gz" \\ --gvcf "./${prefix}.gvcf.tfrecord@${task.cpus}.gz" \\ ${regions} \\ diff --git a/modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff b/modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff new file mode 100644 index 00000000..c46b2925 --- /dev/null +++ b/modules/nf-core/deepvariant/postprocessvariants/deepvariant-postprocessvariants.diff @@ -0,0 +1,26 @@ +Changes in component 'nf-core/deepvariant/postprocessvariants' +'modules/nf-core/deepvariant/postprocessvariants/meta.yml' is unchanged +Changes in 'deepvariant/postprocessvariants/main.nf': +--- modules/nf-core/deepvariant/postprocessvariants/main.nf ++++ modules/nf-core/deepvariant/postprocessvariants/main.nf +@@ -1,6 +1,6 @@ + process DEEPVARIANT_POSTPROCESSVARIANTS { + tag "$meta.id" +- label 'process_medium' ++ label 'process_high' + + //Conda is not supported at the moment + container "docker.io/google/deepvariant:1.9.0" +@@ -64,6 +64,7 @@ + --outfile "${prefix}.vcf.gz" \\ + --nonvariant_site_tfrecord_path "${gvcf_tfrecords_logical_name}" \\ + --gvcf_outfile "${prefix}.g.vcf.gz" \\ ++ --sample_name ${prefix} \\ + ${regions} \\ + ${small_model_arg} \\ + --cpus $task.cpus + +'modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test' is unchanged +'modules/nf-core/deepvariant/postprocessvariants/tests/main.nf.test.snap' is unchanged +'modules/nf-core/deepvariant/postprocessvariants/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/deepvariant/postprocessvariants/main.nf b/modules/nf-core/deepvariant/postprocessvariants/main.nf index 0830f9ac..dd949901 100644 --- a/modules/nf-core/deepvariant/postprocessvariants/main.nf +++ b/modules/nf-core/deepvariant/postprocessvariants/main.nf @@ -1,6 +1,6 @@ process DEEPVARIANT_POSTPROCESSVARIANTS { tag "$meta.id" - label 'process_medium' + label 'process_high' //Conda is not supported at the moment container "docker.io/google/deepvariant:1.9.0" @@ -64,6 +64,7 @@ process DEEPVARIANT_POSTPROCESSVARIANTS { --outfile "${prefix}.vcf.gz" \\ --nonvariant_site_tfrecord_path "${gvcf_tfrecords_logical_name}" \\ --gvcf_outfile "${prefix}.g.vcf.gz" \\ + --sample_name ${prefix} \\ ${regions} \\ ${small_model_arg} \\ --cpus $task.cpus diff --git a/modules/nf-core/longphase/haplotag/environment.yml b/modules/nf-core/longphase/haplotag/environment.yml index 3a882a5d..f436bdae 100644 --- a/modules/nf-core/longphase/haplotag/environment.yml +++ b/modules/nf-core/longphase/haplotag/environment.yml @@ -5,5 +5,5 @@ channels: - bioconda dependencies: - - bioconda::htslib=1.22.1 - - bioconda::longphase=2.0 + - bioconda::htslib=1.23.1 + - bioconda::longphase=2.0.1 diff --git a/modules/nf-core/longphase/haplotag/longphase-haplotag.diff b/modules/nf-core/longphase/haplotag/longphase-haplotag.diff new file mode 100644 index 00000000..0d93067d --- /dev/null +++ b/modules/nf-core/longphase/haplotag/longphase-haplotag.diff @@ -0,0 +1,34 @@ +Changes in component 'nf-core/longphase/haplotag' +'modules/nf-core/longphase/haplotag/meta.yml' is unchanged +Changes in 'longphase/haplotag/main.nf': +--- modules/nf-core/longphase/haplotag/main.nf ++++ modules/nf-core/longphase/haplotag/main.nf +@@ -42,6 +42,11 @@ + if [ -f "${prefix}.out" ]; then + mv ${prefix}.out ${prefix}.log + fi ++ ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') ++ END_VERSIONS + """ + + stub: +@@ -52,5 +57,10 @@ + """ + touch ${prefix}.${suffix} + ${log} ++ ++ cat <<-END_VERSIONS > versions.yml ++ "${task.process}": ++ longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') ++ END_VERSIONS + """ +-} ++} +'modules/nf-core/longphase/haplotag/environment.yml' is unchanged +'modules/nf-core/longphase/haplotag/tests/main.nf.test' is unchanged +'modules/nf-core/longphase/haplotag/tests/main.nf.test.snap' is unchanged +'modules/nf-core/longphase/haplotag/tests/nextflow.config' is unchanged +************************************************************ diff --git a/modules/nf-core/longphase/haplotag/main.nf b/modules/nf-core/longphase/haplotag/main.nf index a64eb7c8..d529554a 100644 --- a/modules/nf-core/longphase/haplotag/main.nf +++ b/modules/nf-core/longphase/haplotag/main.nf @@ -4,8 +4,8 @@ process LONGPHASE_HAPLOTAG { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b0/b0184a9a36d8612fbae38bbaad7b52f03b815ad17673740e107cf1f267a1f15d/data': - 'community.wave.seqera.io/library/htslib_longphase:3071e61356fc25a4' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/83/83fce1d397cf71705cc096fc0e0e52f7013bdd471ef68ee53ae765688e5c439c/data': + 'community.wave.seqera.io/library/longphase_samtools:8c61296cae7a5fc0' }" input: tuple val(meta), path(bam), path(bai), path(snps), path(svs), path(mods) @@ -16,7 +16,7 @@ process LONGPHASE_HAPLOTAG { output: tuple val(meta), path("*.{bam,cram}"), emit: bam tuple val(meta), path("*.log") , emit: log , optional: true - path "versions.yml" , emit: versions + tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions when: task.ext.when == null || task.ext.when @@ -63,4 +63,4 @@ process LONGPHASE_HAPLOTAG { longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') END_VERSIONS """ -} +} \ No newline at end of file diff --git a/modules/nf-core/longphase/haplotag/meta.yml b/modules/nf-core/longphase/haplotag/meta.yml index 63368b31..5c3ad844 100644 --- a/modules/nf-core/longphase/haplotag/meta.yml +++ b/modules/nf-core/longphase/haplotag/meta.yml @@ -1,7 +1,7 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "longphase_haplotag" -description: LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, - small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms. +description: LongPhase is an ultra-fast program for simultaneously co-phasing + SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio + platforms. keywords: - haplotag - long-read @@ -15,9 +15,9 @@ tools: documentation: "https://github.com/twolinin/longphase" tool_dev_url: "https://github.com/twolinin/longphase" doi: "10.1093/bioinformatics/btac058" - licence: ["GPL v3"] + licence: + - "GPL v3" identifier: "" - input: - - meta: type: map @@ -92,13 +92,27 @@ output: description: Log file pattern: "*.log" ontologies: [] + versions_longphase: + - - ${task.process}: + type: string + description: The name of the process + - longphase: + type: string + description: The name of the tool + - "longphase --version | head -n 1 | sed 's/Version: //'": + type: eval + description: The expression to obtain the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - longphase: + type: string + description: The name of the tool + - "longphase --version | head -n 1 | sed 's/Version: //'": + type: eval + description: The expression to obtain the version of the tool authors: - "@fellen31" maintainers: diff --git a/modules/nf-core/longphase/haplotag/tests/main.nf.test b/modules/nf-core/longphase/haplotag/tests/main.nf.test index fb5d9311..687f61e0 100644 --- a/modules/nf-core/longphase/haplotag/tests/main.nf.test +++ b/modules/nf-core/longphase/haplotag/tests/main.nf.test @@ -38,7 +38,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions")}, bam(process.out.bam.get(0).get(1), stringency: 'silent').getHeader(), bam(process.out.bam.get(0).get(1), stringency: 'silent').getReadsMD5(), ).match() } @@ -79,7 +79,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions")}, process.out.log, bam(process.out.bam.get(0).get(1), 'https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/genome/genome.fasta', stringency: 'silent').getHeader()[2..5], bam(process.out.bam.get(0).get(1), 'https://github.com/nf-core/test-datasets/raw/modules/data/genomics/homo_sapiens/genome/genome.fasta', stringency: 'silent').getReadsMD5(), @@ -118,7 +118,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot( - process.out.versions, + process.out.findAll { key, val -> key.startsWith("versions")}, bam(process.out.bam.get(0).get(1), stringency: 'silent').getHeader(), bam(process.out.bam.get(0).get(1), stringency: 'silent').getReadsMD5(), ).match() } @@ -157,7 +157,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } @@ -194,7 +194,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } diff --git a/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap b/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap index 412fb8d0..23287721 100644 --- a/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap +++ b/modules/nf-core/longphase/haplotag/tests/main.nf.test.snap @@ -2,25 +2,6 @@ "[ bam, bai, snps, [], [] ], fasta, fai - log & cram -stub": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.cram:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "2": [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" - ], "bam": [ [ { @@ -37,55 +18,51 @@ "test.log:md5,d41d8cd98f00b204e9800998ecf8427e" ] ], - "versions": [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-10-22T11:47:07.246878607" + "timestamp": "2026-03-20T10:13:11.968590854" }, "[ bam, bai, snps, [], [] ], fasta, fai": { "content": [ - [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" - ], + { + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] + ] + }, [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:chr22\tLN:40001", "@RG\tID:test\tSM:test", "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", - "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0.1\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " ], "721264eb2824a3146b331f2532d10180" ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-10-22T11:46:50.605854269" + "timestamp": "2026-03-20T10:12:40.584213389" }, "[ bam, bai, snps, [], [] ], fasta, fai -stub": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.bam:md5,d41d8cd98f00b204e9800998ecf8427e" - ] - ], - "1": [ - - ], - "2": [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" - ], "bam": [ [ { @@ -97,43 +74,59 @@ "log": [ ], - "versions": [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-10-22T11:47:03.645623933" + "timestamp": "2026-03-20T10:13:04.628910585" }, "[ bam, bai, snps, svs, [] ], fasta, fai": { "content": [ - [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" - ], + { + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] + ] + }, [ "@HD\tVN:1.6\tSO:coordinate", "@SQ\tSN:chr22\tLN:40001", "@RG\tID:test\tSM:test", "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", - "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam --sv-file NA24385_sv.vcf.gz " + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0.1\tCL:longphase haplotag --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam --sv-file NA24385_sv.vcf.gz " ], "721264eb2824a3146b331f2532d10180" ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-10-22T11:47:00.022853298" + "timestamp": "2026-03-20T10:12:57.997252428" }, "[ bam, bai, snps, [], [] ], fasta, fai - log & cram": { "content": [ - [ - "versions.yml:md5,2dfda84762159ad14ca71803b3139183" - ], + { + "versions_longphase": [ + [ + "LONGPHASE_HAPLOTAG", + "longphase", + "2.0.1 " + ] + ] + }, [ [ { @@ -146,14 +139,14 @@ "@RG\tID:test\tSM:test", "@PG\tID:minimap2\tPN:minimap2\tVN:2.28-r1209\tCL:minimap2 -y -x map-ont --secondary=no -R @RG\\tID:test\\tSM:test -t 30 -a genome.mmi test.bam_other.fastq.gz", "@PG\tID:samtools\tPN:samtools\tPP:minimap2\tVN:1.19.2\tCL:samtools sort -@ 29 -o test.bam_other.fastq.gz.bam --write-index", - "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0\tCL:longphase haplotag --log --cram --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " + "@PG\tID:longphase\tPN:longphase\tPP:samtools\tVN:2.0.1\tCL:longphase haplotag --log --cram --threads 2 -o test --reference genome.fasta --snp-file test.genome.vcf.gz --bam test.sorted.bam " ], "721264eb2824a3146b331f2532d10180" ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.6" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-10-22T11:46:55.93374635" + "timestamp": "2026-03-20T10:12:49.73950987" } } \ No newline at end of file diff --git a/modules/nf-core/longphase/phase/environment.yml b/modules/nf-core/longphase/phase/environment.yml index 3a882a5d..f436bdae 100644 --- a/modules/nf-core/longphase/phase/environment.yml +++ b/modules/nf-core/longphase/phase/environment.yml @@ -5,5 +5,5 @@ channels: - bioconda dependencies: - - bioconda::htslib=1.22.1 - - bioconda::longphase=2.0 + - bioconda::htslib=1.23.1 + - bioconda::longphase=2.0.1 diff --git a/modules/nf-core/longphase/phase/longphase-phase.diff b/modules/nf-core/longphase/phase/longphase-phase.diff index d8151f8c..b994f78f 100644 --- a/modules/nf-core/longphase/phase/longphase-phase.diff +++ b/modules/nf-core/longphase/phase/longphase-phase.diff @@ -3,31 +3,39 @@ Changes in component 'nf-core/longphase/phase' Changes in 'longphase/phase/main.nf': --- modules/nf-core/longphase/phase/main.nf +++ modules/nf-core/longphase/phase/main.nf -@@ -14,10 +14,13 @@ +@@ -14,11 +14,14 @@ output: - tuple val(meta), path("${prefix}.vcf.gz") , emit: snv_vcf - tuple val(meta), path("${prefix}_SV.vcf.gz") , emit: sv_vcf , optional: true - tuple val(meta), path("${prefix}_mod.vcf.gz"), emit: mod_vcf, optional: true -- path "versions.yml" , emit: versions + tuple val(meta), path("${prefix}.vcf.gz") , emit: snv_vcf + tuple val(meta), path("${prefix}.vcf.gz.tbi") , emit: snv_vcf_index + tuple val(meta), path("${prefix}_SV.vcf.gz") , emit: sv_vcf , optional: true + tuple val(meta), path("${prefix}_SV.vcf.gz.tbi") , emit: sv_vcf_index , optional: true + tuple val(meta), path("${prefix}_mod.vcf.gz") , emit: mod_vcf, optional: true + tuple val(meta), path("${prefix}_mod.vcf.gz.tbi"), emit: mod_vcf_index, optional: true -+ path "versions.yml" , emit: versions - + tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions +- ++ when: task.ext.when == null || task.ext.when -@@ -45,10 +48,13 @@ - --threads $task.cpus \\ + +@@ -46,9 +49,20 @@ $args2 \\ ${prefix}*.vcf -+ -+ tabix -p vcf ${prefix}*.vcf.gz ++ tabix -p vcf ${prefix}.vcf.gz ++ ++ if [ -f ${prefix}_SV.vcf.gz ]; then ++ tabix -p vcf ${prefix}_SV.vcf.gz ++ fi ++ ++ if [ -f ${prefix}_mod.vcf.gz ]; then ++ tabix -p vcf ${prefix}_mod.vcf.gz ++ fi ++ cat <<-END_VERSIONS > versions.yml "${task.process}": longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') @@ -35,7 +43,12 @@ Changes in 'longphase/phase/main.nf': END_VERSIONS """ - +@@ -69,4 +83,4 @@ + longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') + END_VERSIONS + """ +-} ++} 'modules/nf-core/longphase/phase/environment.yml' is unchanged 'modules/nf-core/longphase/phase/tests/main.nf.test' is unchanged 'modules/nf-core/longphase/phase/tests/main.nf.test.snap' is unchanged diff --git a/modules/nf-core/longphase/phase/main.nf b/modules/nf-core/longphase/phase/main.nf index 3b942972..3bc0b715 100644 --- a/modules/nf-core/longphase/phase/main.nf +++ b/modules/nf-core/longphase/phase/main.nf @@ -4,8 +4,8 @@ process LONGPHASE_PHASE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b0/b0184a9a36d8612fbae38bbaad7b52f03b815ad17673740e107cf1f267a1f15d/data': - 'community.wave.seqera.io/library/htslib_longphase:3071e61356fc25a4' }" + 'https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/83/83fce1d397cf71705cc096fc0e0e52f7013bdd471ef68ee53ae765688e5c439c/data': + 'community.wave.seqera.io/library/longphase_samtools:8c61296cae7a5fc0' }" input: tuple val(meta), path(bam), path(bai), path(snvs), path(svs), path(mods) @@ -20,8 +20,8 @@ process LONGPHASE_PHASE { tuple val(meta), path("${prefix}_SV.vcf.gz.tbi") , emit: sv_vcf_index , optional: true tuple val(meta), path("${prefix}_mod.vcf.gz") , emit: mod_vcf, optional: true tuple val(meta), path("${prefix}_mod.vcf.gz.tbi"), emit: mod_vcf_index, optional: true - path "versions.yml" , emit: versions - + tuple val("${task.process}"), val("longphase"), eval("longphase --version | head -n 1 | sed 's/Version: //'"), emit: versions_longphase, topic: versions + when: task.ext.when == null || task.ext.when @@ -49,7 +49,15 @@ process LONGPHASE_PHASE { $args2 \\ ${prefix}*.vcf - tabix -p vcf ${prefix}*.vcf.gz + tabix -p vcf ${prefix}.vcf.gz + + if [ -f ${prefix}_SV.vcf.gz ]; then + tabix -p vcf ${prefix}_SV.vcf.gz + fi + + if [ -f ${prefix}_mod.vcf.gz ]; then + tabix -p vcf ${prefix}_mod.vcf.gz + fi cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -75,4 +83,4 @@ process LONGPHASE_PHASE { longphase: \$(longphase --version | head -n 1 | sed 's/Version: //') END_VERSIONS """ -} +} \ No newline at end of file diff --git a/modules/nf-core/longphase/phase/meta.yml b/modules/nf-core/longphase/phase/meta.yml index 94efc684..266b878b 100644 --- a/modules/nf-core/longphase/phase/meta.yml +++ b/modules/nf-core/longphase/phase/meta.yml @@ -1,7 +1,7 @@ -# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json name: "longphase_phase" -description: LongPhase is an ultra-fast program for simultaneously co-phasing SNPs, - small indels, large SVs, and (5mC) modifications for Nanopore and PacBio platforms. +description: LongPhase is an ultra-fast program for simultaneously co-phasing + SNPs, small indels, large SVs, and (5mC) modifications for Nanopore and PacBio + platforms. keywords: - phase - long-read @@ -15,9 +15,9 @@ tools: documentation: "https://github.com/twolinin/longphase" tool_dev_url: "https://github.com/twolinin/longphase" doi: "10.1093/bioinformatics/btac058" - licence: ["GPL v3"] + licence: + - "GPL v3" identifier: "" - input: - - meta: type: map @@ -76,43 +76,57 @@ output: description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - "${prefix}.vcf.gz": + - ${prefix}.vcf.gz: type: file description: Compressed VCF file with phased SNVs and indels pattern: "*.vcf.gz" ontologies: - - edam: http://edamontology.org/format_3989 # GZIP format + - edam: http://edamontology.org/format_3989 sv_vcf: - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - "${prefix}_SV.vcf.gz": + - ${prefix}_SV.vcf.gz: type: file description: Compressed VCF file with phased SVs pattern: "*_SV.vcf.gz" ontologies: - - edam: http://edamontology.org/format_3989 # GZIP format + - edam: http://edamontology.org/format_3989 mod_vcf: - - meta: type: map description: | Groovy Map containing sample information e.g. `[ id:'sample1', single_end:false ]` - - "${prefix}_mod.vcf.gz": + - ${prefix}_mod.vcf.gz: type: file description: Compressed VCF file with phased modifications pattern: "*.vcf.gz" ontologies: - - edam: http://edamontology.org/format_3989 # GZIP format + - edam: http://edamontology.org/format_3989 + versions_longphase: + - - ${task.process}: + type: string + description: The name of the process + - longphase: + type: string + description: The name of the tool + - "longphase --version | head -n 1 | sed 's/Version: //'": + type: eval + description: The expression to obtain the version of the tool +topics: versions: - - versions.yml: - type: file - description: File containing software versions - pattern: "versions.yml" - ontologies: - - edam: http://edamontology.org/format_3750 # YAML + - - ${task.process}: + type: string + description: The name of the process + - longphase: + type: string + description: The name of the tool + - "longphase --version | head -n 1 | sed 's/Version: //'": + type: eval + description: The expression to obtain the version of the tool authors: - "@fellen31" maintainers: diff --git a/modules/nf-core/longphase/phase/tests/main.nf.test b/modules/nf-core/longphase/phase/tests/main.nf.test index b45bbf01..30c666ba 100644 --- a/modules/nf-core/longphase/phase/tests/main.nf.test +++ b/modules/nf-core/longphase/phase/tests/main.nf.test @@ -38,7 +38,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } @@ -72,7 +72,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } @@ -112,7 +112,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } @@ -148,7 +148,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } @@ -183,7 +183,7 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match() } + { assert snapshot(sanitizeOutput(process.out)).match() } ) } diff --git a/modules/nf-core/longphase/phase/tests/main.nf.test.snap b/modules/nf-core/longphase/phase/tests/main.nf.test.snap index b0cf7144..c6a155f9 100644 --- a/modules/nf-core/longphase/phase/tests/main.nf.test.snap +++ b/modules/nf-core/longphase/phase/tests/main.nf.test.snap @@ -2,28 +2,6 @@ "[ bam, bai, snps, svs, [] ], fasta, fai - stub": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test_SV.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" - ], "mod_vcf": [ ], @@ -43,37 +21,24 @@ "test_SV.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" ] ], - "versions": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.5" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-11-06T16:06:24.025191062" + "timestamp": "2026-03-20T10:13:50.746589174" }, "[ bam, bai, snps, [], [] ], fasta, fai": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,77d7ca7d16c841d3f552681abef984dc" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" - ], "mod_vcf": [ ], @@ -82,48 +47,30 @@ { "id": "test" }, - "test.vcf.gz:md5,77d7ca7d16c841d3f552681abef984dc" + "test.vcf.gz:md5,73d5f51aea92e09b3d427837066f114c" ] ], "sv_vcf": [ ], - "versions": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.5" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-11-06T16:05:57.029934447" + "timestamp": "2026-03-20T10:13:19.273322013" }, "[ bam, bai, snps, svs, [] ], fasta, fai": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,f26bc442f6a1645bcfaabf989ab9483c" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test_SV.vcf.gz:md5,e1b83c15a21bab57f2b228cc7c7d8be8" - ] - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" - ], "mod_vcf": [ ], @@ -132,7 +79,7 @@ { "id": "test" }, - "test.vcf.gz:md5,f26bc442f6a1645bcfaabf989ab9483c" + "test.vcf.gz:md5,af297491417a5727de21f893b553db37" ] ], "sv_vcf": [ @@ -140,45 +87,27 @@ { "id": "test" }, - "test_SV.vcf.gz:md5,e1b83c15a21bab57f2b228cc7c7d8be8" + "test_SV.vcf.gz:md5,4636e0ac86a86565e5d04b5d1b6a00e7" ] ], - "versions": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.5" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-11-06T16:06:03.319855838" + "timestamp": "2026-03-20T10:13:27.450110496" }, "[ bam x2, bai x2, snps, svs, [] ], fasta, fai": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,5333ba9fa14233d3fdbd8b9e1786b998" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "test_SV.vcf.gz:md5,434fd35ae3de2a9187e43932686bfd19" - ] - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" - ], "mod_vcf": [ ], @@ -187,7 +116,7 @@ { "id": "test" }, - "test.vcf.gz:md5,5333ba9fa14233d3fdbd8b9e1786b998" + "test.vcf.gz:md5,f688da3f046717765e879c061510e037" ] ], "sv_vcf": [ @@ -195,40 +124,27 @@ { "id": "test" }, - "test_SV.vcf.gz:md5,434fd35ae3de2a9187e43932686bfd19" + "test_SV.vcf.gz:md5,5336fc5eb9d3421cef66fd18320a4cb8" ] ], - "versions": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.5" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-11-06T16:06:10.867281359" + "timestamp": "2026-03-20T10:13:36.797768748" }, "[ bam, bai, snps, [], [] ], fasta, fai - stub": { "content": [ { - "0": [ - [ - { - "id": "test" - }, - "test.vcf.gz:md5,68b329da9893e34099c7d8ad5cb9c940" - ] - ], - "1": [ - - ], - "2": [ - - ], - "3": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" - ], "mod_vcf": [ ], @@ -243,15 +159,19 @@ "sv_vcf": [ ], - "versions": [ - "versions.yml:md5,1bc54f97e2b06e354a655d1066245fb4" + "versions_longphase": [ + [ + "LONGPHASE_PHASE", + "longphase", + "2.0.1 " + ] ] } ], "meta": { - "nf-test": "0.9.2", - "nextflow": "25.04.5" + "nf-test": "0.9.3", + "nextflow": "25.10.4" }, - "timestamp": "2025-11-06T16:06:17.992733472" + "timestamp": "2026-03-20T10:13:44.86300696" } } \ No newline at end of file diff --git a/modules/nf-core/severus/main.nf b/modules/nf-core/severus/main.nf index f191fd0b..95fe7912 100644 --- a/modules/nf-core/severus/main.nf +++ b/modules/nf-core/severus/main.nf @@ -20,12 +20,12 @@ process SEVERUS { tuple val(meta), path("${prefix}/severus_collaped_dup.bed") , emit: collapsed_dup , optional: true tuple val(meta), path("${prefix}/severus_LOH.bed") , emit: loh , optional: true tuple val(meta), path("${prefix}/all_SVs/severus_all.vcf.gz") , emit: all_vcf , optional: true - tuple val(meta), path("${prefix}/all_SVs/breakpoints_clusters_list.tsv") , emit: all_breakpoints_clusters_list , optional: true - tuple val(meta), path("${prefix}/all_SVs/breakpoints_clusters.tsv") , emit: all_breakpoints_clusters , optional: true + tuple val(meta), path("${prefix}/all_SVs/breakpoint_clusters_list.tsv") , emit: all_breakpoints_clusters_list , optional: true + tuple val(meta), path("${prefix}/all_SVs/breakpoint_clusters.tsv") , emit: all_breakpoints_clusters , optional: true tuple val(meta), path("${prefix}/all_SVs/plots/severus_*.html") , emit: all_plots , optional: true tuple val(meta), path("${prefix}/somatic_SVs/severus_somatic.vcf.gz") , emit: somatic_vcf , optional: true - tuple val(meta), path("${prefix}/somatic_SVs/breakpoints_clusters_list.tsv"), emit: somatic_breakpoints_clusters_list, optional: true - tuple val(meta), path("${prefix}/somatic_SVs/breakpoints_clusters.tsv") , emit: somatic_breakpoints_clusters , optional: true + tuple val(meta), path("${prefix}/somatic_SVs/breakpoint_clusters_list.tsv"), emit: somatic_breakpoints_clusters_list, optional: true + tuple val(meta), path("${prefix}/somatic_SVs/breakpoint_clusters.tsv") , emit: somatic_breakpoints_clusters , optional: true tuple val(meta), path("${prefix}/somatic_SVs/plots/severus_*.html") , emit: somatic_plots , optional: true path "versions.yml" , emit: versions diff --git a/nextflow.config b/nextflow.config index 2dfa72e2..64cf5653 100644 --- a/nextflow.config +++ b/nextflow.config @@ -13,6 +13,18 @@ params { // Input options input = null + // Small variant calling options + germline_var_keep = ['deepvariant', 'clair'] + somatic_var_keep = ['deepsomatic', 'clair'] + germline_var_combine = 'all' + somatic_var_combine = 'all' + prioritize_caller_germline = 'deepvariant' + prioritize_caller_somatic = 'deepsomatic' + + // PON Options + pon_vcfs = null + pon_flags = null + // References genome = null igenomes_base = 's3://ngi-igenomes/igenomes/' @@ -38,6 +50,8 @@ params { skip_normalfiber = false skip_m6a = false skip_vep = false + skip_modcall = false + use_gpu = false skip_whatshapstats = false // minimap2 options diff --git a/nextflow_schema.json b/nextflow_schema.json index beb2b8c9..06a0603b 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -43,6 +43,70 @@ } } }, + "pon_options": { + "title": "Panel of Normals (PON) options", + "type": "object", + "description": "Options for panel of normals filtering", + "default": "", + "properties": { + "pon_vcfs": { + "type": "string", + "description": "Path to panel of normals VCF file(s) for somatic variant filtering" + }, + "pon_flags": { + "type": "string", + "description": "Additional flags to pass to the PON filtering step" + } + } + }, + "small_variant_calling_options": { + "title": "options for small variant calling", + "type": "object", + "properties": { + "germline_var_keep": { + "type": "array", + "description": "List of germline variant callers to use. Must include at least one of [deepvariant, clair].", + "items": { + "type": "string", + "enum": ["deepvariant", "clair"] + }, + "minItems": 1 + }, + "somatic_var_keep": { + "type": "array", + "description": "List of somatic variant callers to use. Must include at least one of [deepsomatic, clair].", + "items": { + "type": "string", + "enum": ["deepsomatic", "clair"] + }, + "minItems": 1 + }, + "germline_var_combine": { + "type": "string", + "description": "When two germline callers are used, specifies how to combine them. 'consensus' keeps only variants called by both callers; 'all' keeps all variants from both callers.", + "default": "all", + "enum": ["consensus", "all"] + }, + "somatic_var_combine": { + "type": "string", + "description": "When two somatic callers are used, specifies how to combine them. 'consensus' keeps only variants called by both callers; 'all' keeps all variants from both callers.", + "default": "all", + "enum": ["consensus", "all"] + }, + "prioritize_caller_germline": { + "type": "string", + "description": "When both germline callers are used, specifies which caller's format to use for variants called by both. Must be [deepvariant, clair].", + "default": "deepvariant", + "enum": ["deepvariant", "clair"] + }, + "prioritize_caller_somatic": { + "type": "string", + "description": "When both somatic callers are used, specifies which caller's format to use for variants called by both. Must be [deepsomatic, clair].", + "default": "deepsomatic", + "enum": ["deepsomatic", "clair"] + } + } + }, "reference_genome_options": { "title": "Reference genome options", "type": "object", @@ -58,6 +122,7 @@ }, "igenomes_ignore": { "type": "boolean", + "default": false, "description": "Do not load the iGenomes reference config.", "fa_icon": "fas fa-ban", "hidden": true, @@ -132,7 +197,9 @@ "default": 113 }, "download_vep_cache": { - "type": "boolean" + "type": "boolean", + "default": false, + "description": "Download the VEP cache if not already present" }, "vep_custom": { "type": "string" @@ -212,6 +279,7 @@ }, "ascat_pdf_plots": { "type": "boolean", + "default": false, "description": "Boolean for ASCAT production of pdf plots (entered as string)" } } @@ -235,49 +303,73 @@ "properties": { "skip_qc": { "type": "boolean", + "default": false, "description": "Skips all QC steps" }, "skip_cramino": { "type": "boolean", + "default": false, "description": "Skips Cramino" }, "skip_mosdepth": { "type": "boolean", + "default": false, "description": "Skips Mosdepth" }, "skip_bamstats": { "type": "boolean", + "default": false, "description": "Skips samtools flagstat, stats, and idxstats" }, "skip_wakhan": { "type": "boolean", + "default": false, "description": "Skips wakhan" }, "skip_fiber": { "type": "boolean", + "default": false, "description": "Skip Fibertools steps" }, "skip_ascat": { "type": "boolean", + "default": false, "description": "Skip ASCAT" }, "skip_m6a": { "type": "boolean", + "default": false, "description": "Skip m6a calling by Fibertools" }, "skip_vep": { - "type": "boolean" + "type": "boolean", + "default": false, + "description": "Skip VEP annotation" }, "skip_normalfiber": { - "type": "boolean" + "type": "boolean", + "default": false, + "description": "Skip Fibertools steps for the normal sample" }, "skip_nanoplot": { "type": "boolean", + "default": false, "description": "Skip Nanoplot" }, "skip_whatshapstats": { "type": "boolean", + "default": false, "description": "Skip WhatsHap stats" + }, + "skip_modcall": { + "type": "boolean", + "default": false, + "description": "Skip modification calling" + }, + "use_gpu": { + "type": "boolean", + "default": false, + "description": "Use GPU for supported tools (e.g. DeepVariant, DeepSomatic, Clair3)" } } }, @@ -338,6 +430,7 @@ "properties": { "version": { "type": "boolean", + "default": false, "description": "Display version and exit.", "fa_icon": "fas fa-question-circle", "hidden": true @@ -361,6 +454,7 @@ }, "plaintext_email": { "type": "boolean", + "default": false, "description": "Send plain-text email instead of HTML.", "fa_icon": "fas fa-remove-format", "hidden": true @@ -375,6 +469,7 @@ }, "monochrome_logs": { "type": "boolean", + "default": false, "description": "Do not use coloured log outputs.", "fa_icon": "fas fa-palette", "hidden": true @@ -425,15 +520,18 @@ "hidden": true }, "help": { - "type": ["boolean", "string"], + "type": "boolean", + "default": false, "description": "Display the help message." }, "help_full": { "type": "boolean", + "default": false, "description": "Display the full detailed help message." }, "show_hidden": { "type": "boolean", + "default": false, "description": "Display hidden parameters in the help message (only works when --help or --help_full are provided)." } } @@ -443,6 +541,12 @@ { "$ref": "#/$defs/input_output_options" }, + { + "$ref": "#/$defs/pon_options" + }, + { + "$ref": "#/$defs/small_variant_calling_options" + }, { "$ref": "#/$defs/reference_genome_options" }, diff --git a/ro-crate-metadata.json b/ro-crate-metadata.json index 35fb56bf..09c9961f 100644 --- a/ro-crate-metadata.json +++ b/ro-crate-metadata.json @@ -23,7 +23,7 @@ "@type": "Dataset", "creativeWorkStatus": "InProgress", "datePublished": "2025-12-23T12:58:53+00:00", - "description": "# IntGenomicsLab/lrsomatic\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/IntGenomicsLab/lrsomatic)\n[![GitHub Actions CI Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.17751829-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.17751829)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/IntGenomicsLab/lrsomatic)\n\n## Introduction\n\n**IntGenomicsLab/lrsomatic** is a robust bioinformatics pipeline designed for processing and analyzing **somatic DNA sequencing** data for long-read sequencing technologies from **Oxford Nanopore** and **PacBio**. It supports both canonical base DNA and modified base calling, including specialized applications such as **Fiber-seq**.\n\nThis **end-to-end pipeline** handles the entire workflow \u2014 **from raw read processing and alignment, to comprehensive somatic variant calling**, including single nucleotide variants, indels, structural variants, copy number alterations, and modified bases.\n\nIt can be run in both **matched tumour-normal** and **tumour-only mode**, offering flexibility depending on the users study design.\n\nDeveloped using **Nextflow DSL2**, it offers high portability and scalability across diverse computing environments. By leveraging Docker or Singularity containers, installation is streamlined and results are highly reproducible. Each process runs in an isolated container, simplifying dependency management and updates. Where applicable, pipeline components are sourced from **nf-core/modules**, promoting reuse, interoperability, and consistency within the broader Nextflow and nf-core ecosystems.\n\n## Pipeline summary\n\n![image](./assets/lrsomatic_1.0.png)\n\n**1) Pre-processing:**\n\na. Raw read QC ([`cramino`](https://github.com/wdecoster/cramino))\n\nb. Alignment to the reference genome ([`minimap2`](https://github.com/lh3/minimap2))\n\nc. Post alignment QC ([`cramino`](https://github.com/wdecoster/cramino), [`samtools idxstats`](https://github.com/samtools/samtools), [`samtools flagstats`](https://github.com/samtools/samtools), [`samtools stats`](https://github.com/samtools/samtools))\n\nd. Specific for calling modified base calling ([`Modkit`](https://github.com/nanoporetech/modkit), [`Fibertools`](https://github.com/fiberseq/fibertools-rs))\n\n**2i) Matched mode: small variant calling:**\n\na. Calling Germline SNPs ([`Clair3`](https://github.com/HKU-BAL/Clair3))\n\nb. Phasing and Haplotagging the SNPs in the normal and tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\nc. Calling somatic SNVs ([`ClairS`](https://github.com/HKU-BAL/ClairS))\n\n**2ii) Tumour only mode: small variant calling:**\n\na. Calling Germline SNPs and somatic SNVs ([`ClairS-TO`](https://github.com/HKU-BAL/ClairS-TO))\n\nb. Phasing and Haplotagging germline SNPs in tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\n**3) Large variant calling:**\n\na. Somatic structural variant calling ([`Severus`](https://github.com/KolmogorovLab/Severus))\n\nb. Copy number alterion calling; long read version of ([`ASCAT`](https://github.com/VanLoo-lab/ascat))\n\n**4) Annotation:**\n\na. Small variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\nb. Structural variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst prepare a samplesheet with your input data that looks as follows:\n\n```csv\nsample,bam_tumor,bam_normal,platform,sex,fiber\nsample1,tumour.bam,normal.bam,ont,female,n\nsample2,tumour.bam,,ont,female,y\nsample3,tumour.bam,,pb,male,n\nsample4,tumour.bam,normal.bam,pb,male,y\n```\n\nEach row represents a sample. The bam files should always be unaligned bam files. All fields except for `bam_normal` are required. If `bam_normal` is empty, the pipeline will run in tumour only mode. `platform` should be either `ont` or `pb` for Oxford Nanopore Sequencing or PacBio sequencing, respectively. `sex` refers to the biological sex of the sample and should be either `female` or `male`. Finally, `fiber` specifies whether your sample is Fiber-seq data or not and should have either `y` for Yes or `n` for No.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run IntGenomicsLab/lrsomatic \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\nMore detail is given in our [usage documentation](/docs/usage.md)\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\n## Credits\n\nIntGenomicsLab/lr_somatic was originally written by Luuk Harbers, Robert Forsyth, Alexandra Pan\u010d\u00edkov\u00e1, Marios Eftychiou, Ruben Cools, Laurens Lambrechts, and Jonas Demeulemeester.\n\n## Pipeline output\n\nThis pipeline produces a series of different output files. The main output is an aligned and phased tumour bam file. This bam file can be used by any typical downstream tool that uses bam files as input. Furthermore, we have sample-specific QC outputs from `cramino` (fastq), `cramino` (bam), `mosdepth`, `samtools` (stats/flagstat/idxstats), and optionally `fibertools`. Finally, we have a `multiqc` report from that combines the output from `mosdepth` and `samtools` into one html report.\n\nBesides QC and the aligned and phased bam file, we have output from (structural) variant and copy number callers, of which some are optional. The output from these variant callers can be found in their respective folders. For small and structural variant callers (`clairS`, `clairS-TO`, and `severus`) these will contain, among others, `vcf` files with called variants. For `ascat` these contain files with final copy number information and plots of the copy number profiles.\n\nExample output directory structure:\n\n```\n\u251c\u2500\u2500 Sample 1\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500clairS-TO\n\u2502 \u2502 \u251c\u2500\u2500severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u2502\n\u251c\u2500\u2500 Sample 2\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u2502 \u251c\u2500\u2500 normal\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500 clair3\n\u2502 \u2502 \u251c\u2500\u2500 clairS\n\u2502 \u2502 \u251c\u2500\u2500 severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u251c\u2500\u2500 pipeline_info\n```\n\nmore detail is given in our [output documentation](/docs/output.md)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use IntGenomicsLab/lrsomatic for your analysis, please cite it using the following doi: [10.5281/zenodo.17751829](https://doi.org/10.5281/zenodo.17751829)\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", + "description": "# IntGenomicsLab/lrsomatic\n\n[![Open in GitHub Codespaces](https://img.shields.io/badge/Open_In_GitHub_Codespaces-black?labelColor=grey&logo=github)](https://github.com/codespaces/new/IntGenomicsLab/lrsomatic)\n[![GitHub Actions CI Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/nf-test.yml)\n[![GitHub Actions Linting Status](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml/badge.svg)](https://github.com/IntGenomicsLab/lrsomatic/actions/workflows/linting.yml)[![Cite with Zenodo](http://img.shields.io/badge/DOI-10.5281/zenodo.17751829-1073c8?labelColor=000000)](https://doi.org/10.5281/zenodo.17751829)\n[![nf-test](https://img.shields.io/badge/unit_tests-nf--test-337ab7.svg)](https://www.nf-test.com)\n\n[![Nextflow](https://img.shields.io/badge/version-%E2%89%A525.04.0-green?style=flat&logo=nextflow&logoColor=white&color=%230DC09D&link=https%3A%2F%2Fnextflow.io)](https://www.nextflow.io/)\n[![nf-core template version](https://img.shields.io/badge/nf--core_template-3.5.1-green?style=flat&logo=nfcore&logoColor=white&color=%2324B064&link=https%3A%2F%2Fnf-co.re)](https://github.com/nf-core/tools/releases/tag/3.5.1)\n[![run with conda](http://img.shields.io/badge/run%20with-conda-3EB049?labelColor=000000&logo=anaconda)](https://docs.conda.io/en/latest/)\n[![run with docker](https://img.shields.io/badge/run%20with-docker-0db7ed?labelColor=000000&logo=docker)](https://www.docker.com/)\n[![run with singularity](https://img.shields.io/badge/run%20with-singularity-1d355c.svg?labelColor=000000)](https://sylabs.io/docs/)\n[![Launch on Seqera Platform](https://img.shields.io/badge/Launch%20%F0%9F%9A%80-Seqera%20Platform-%234256e7)](https://cloud.seqera.io/launch?pipeline=https://github.com/IntGenomicsLab/lrsomatic)\n\n## Introduction\n\n**IntGenomicsLab/lrsomatic** is a robust bioinformatics pipeline designed for processing and analyzing **somatic DNA sequencing** data for long-read sequencing technologies from **Oxford Nanopore** and **PacBio**. It supports both canonical base DNA and modified base calling, including specialized applications such as **Fiber-seq**.\n\nThis **end-to-end pipeline** handles the entire workflow \u2014 **from raw read processing and alignment, to comprehensive somatic variant calling**, including single nucleotide variants, indels, structural variants, copy number alterations, and modified bases.\n\nIt can be run in both **matched tumour-normal** and **tumour-only mode**, offering flexibility depending on the users study design.\n\nDeveloped using **Nextflow DSL2**, it offers high portability and scalability across diverse computing environments. By leveraging Docker or Singularity containers, installation is streamlined and results are highly reproducible. Each process runs in an isolated container, simplifying dependency management and updates. Where applicable, pipeline components are sourced from **nf-core/modules**, promoting reuse, interoperability, and consistency within the broader Nextflow and nf-core ecosystems.\n\n## Pipeline summary\n\n![image](./assets/lrsomatic_1.0.png)\n\n**1) Pre-processing:**\n\na. Raw read QC ([`cramino`](https://github.com/wdecoster/cramino))\n\nb. Alignment to the reference genome ([`minimap2`](https://github.com/lh3/minimap2))\n\nc. Post alignment QC ([`cramino`](https://github.com/wdecoster/cramino), [`samtools idxstats`](https://github.com/samtools/samtools), [`samtools flagstats`](https://github.com/samtools/samtools), [`samtools stats`](https://github.com/samtools/samtools))\n\nd. Specific for calling modified base calling ([`Modkit`](https://github.com/nanoporetech/modkit), [`Fibertools`](https://github.com/fiberseq/fibertools-rs))\n\n**2i) Matched mode: small variant calling:**\n\na. Calling Germline SNPs ([`Clair3`](https://github.com/HKU-BAL/Clair3))\n\nb. Phasing and Haplotagging the SNPs in the normal and tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\nc. Calling somatic SNVs ([`ClairS`](https://github.com/HKU-BAL/ClairS))\n\n**2ii) Tumour only mode: small variant calling:**\n\na. Calling Germline SNPs and somatic SNVs ([`ClairS-TO`](https://github.com/HKU-BAL/ClairS-TO))\n\nb. Phasing and Haplotagging germline SNPs in tumour BAM ([`LongPhase`](https://github.com/twolinin/longphase))\n\n**3) Large variant calling:**\n\na. Somatic structural variant calling ([`Severus`](https://github.com/KolmogorovLab/Severus))\n\nb. Copy number alterion calling; long read version of ([`ASCAT`](https://github.com/VanLoo-lab/ascat))\n\n**4) Annotation:**\n\na. Small variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\nb. Structural variant annotation ([`VEP`](https://github.com/Ensembl/ensembl-vep))\n\n\n\n## Usage\n\n> [!NOTE]\n> If you are new to Nextflow and nf-core, please refer to [this page](https://nf-co.re/docs/usage/installation) on how to set-up Nextflow. Make sure to [test your setup](https://nf-co.re/docs/usage/introduction#how-to-run-a-pipeline) with `-profile test` before running the workflow on actual data.\n\nFirst prepare a samplesheet with your input data that looks as follows:\n\n```csv\nsample,bam_tumor,bam_normal,platform,sex,fiber\nsample1,tumour.bam,normal.bam,ont,female,n\nsample2,tumour.bam,,ont,female,y\nsample3,tumour.bam,,pb,male,n\nsample4,tumour.bam,normal.bam,pb,male,y\n```\n\nEach row represents a sample. The bam files should always be unaligned bam files. All fields except for `bam_normal` are required. If `bam_normal` is empty, the pipeline will run in tumour only mode. `platform` should be either `ont` or `pb` for Oxford Nanopore Sequencing or PacBio sequencing, respectively. `sex` refers to the biological sex of the sample and should be either `female` or `male`. Finally, `fiber` specifies whether your sample is Fiber-seq data or not and should have either `y` for Yes or `n` for No.\n\nNow, you can run the pipeline using:\n\n```bash\nnextflow run IntGenomicsLab/lrsomatic \\\n -profile \\\n --input samplesheet.csv \\\n --outdir \n```\n\nMore detail is given in our [usage documentation](/docs/usage.md)\n\n> [!WARNING]\n> Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration _**except for parameters**_; see [docs](https://nf-co.re/docs/usage/getting_started/configuration#custom-configuration-files).\n\n## Credits\n\nIntGenomicsLab/lr_somatic was originally written by Luuk Harbers, Robert Forsyth, Alexandra Pan\u010d\u00edkov\u00e1, Marios Eftychiou, Ruben Cools, Laurens Lambrechts, and Jonas Demeulemeester.\n\n## Pipeline output\n\nThis pipeline produces a series of different output files. The main output is an aligned and phased tumour bam file. This bam file can be used by any typical downstream tool that uses bam files as input. Furthermore, we have sample-specific QC outputs from `cramino` (fastq), `cramino` (bam), `mosdepth`, `samtools` (stats/flagstat/idxstats), and optionally `fibertools`. Finally, we have a `multiqc` report from that combines the output from `mosdepth` and `samtools` into one html report.\n\nBesides QC and the aligned and phased bam file, we have output from (structural) variant and copy number callers, of which some are optional. The output from these variant callers can be found in their respective folders. For small and structural variant callers (`clairS`, `clairS-TO`, and `severus`) these will contain, among others, `vcf` files with called variants. For `ascat` these contain files with final copy number information and plots of the copy number profiles.\n\nExample output directory structure:\n\n```\n\u251c\u2500\u2500 Sample 1\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500clairS-TO\n\u2502 \u2502 \u251c\u2500\u2500severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u2502\n\u251c\u2500\u2500 Sample 2\n\u2502 \u251c\u2500\u2500 ascat\n\u2502 \u251c\u2500\u2500 bamfiles\n\u2502 \u251c\u2500\u2500 qc\n\u2502 \u2502 \u251c\u2500\u2500 tumor\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u2502 \u251c\u2500\u2500 normal\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_aln\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 cramino_ubam\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 fibertoolsrs\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 mosdepth\n\u2502 \u2502 \u2502 \u251c\u2500\u2500 samtools\n\u2502 \u251c\u2500\u2500 variants\n\u2502 \u2502 \u251c\u2500\u2500 clair3\n\u2502 \u2502 \u251c\u2500\u2500 clairS\n\u2502 \u2502 \u251c\u2500\u2500 severus\n\u2502 \u251c\u2500\u2500 vep\n\u2502 \u2502 \u251c\u2500\u2500 germline\n\u2502 \u2502 \u251c\u2500\u2500 somatic\n\u2502 \u2502 \u251c\u2500\u2500 SVs\n\u251c\u2500\u2500 pipeline_info\n```\n\nmore detail is given in our [output documentation](/docs/output.md)\n\n## Contributions and Support\n\nIf you would like to contribute to this pipeline, please see the [contributing guidelines](.github/CONTRIBUTING.md).\n\n## Citations\n\nIf you use `IntGenomicsLab/lrsomatic` for your analysis, please cite it using the following:\n\n> LRSomatic: a highly scalable and robust pipeline for somatic variant calling in long-read sequencing data\n>\n> Robert A. Forsyth*, Luuk Harbers*, Amber Verhasselt, Ana-Luc\u00eda Rocha Iraiz\u00f3s, Sidi Yang, Joris Vande Velde, Christopher Davies, Nischalan Pillay, Laurens Lambrechts, Jonas Demeulemeester\n>\n> bioRxiv 2026.02.26.707772; doi: https://doi.org/10.64898/2026.02.26.707772\n\nAn extensive list of references for the tools used by the pipeline can be found in the [`CITATIONS.md`](CITATIONS.md) file.\n\nThis pipeline uses code and infrastructure developed and maintained by the [nf-core](https://nf-co.re) community, reused here under the [MIT license](https://github.com/nf-core/tools/blob/main/LICENSE).\n\n> **The nf-core framework for community-curated bioinformatics pipelines.**\n>\n> Philip Ewels, Alexander Peltzer, Sven Fillinger, Harshil Patel, Johannes Alneberg, Andreas Wilm, Maxime Ulysse Garcia, Paolo Di Tommaso & Sven Nahnsen.\n>\n> _Nat Biotechnol._ 2020 Feb 13. doi: [10.1038/s41587-020-0439-x](https://dx.doi.org/10.1038/s41587-020-0439-x).\n", "hasPart": [ { "@id": "main.nf" diff --git a/subworkflows/local/deepsomatic.nf b/subworkflows/local/deepsomatic.nf new file mode 100644 index 00000000..d1baf584 --- /dev/null +++ b/subworkflows/local/deepsomatic.nf @@ -0,0 +1,62 @@ +include { DEEPSOMATIC_MAKEEXAMPLES } from '../../modules/local/deepsomatic/makeexamples/main' +include { DEEPSOMATIC_CALLVARIANTS } from '../../modules/local/deepsomatic/callvariants/main' +include { DEEPSOMATIC_POSTPROCESSVARIANTS } from '../../modules/local/deepsomatic/postprocessvariants/main' + +workflow DEEPSOMATIC { + take: + ch_input // [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + // normal_bam/bai may be [] for tumor-only mode + ch_intervals // [[:], []] -- empty intervals (genome-wide calling) + ch_fasta // [[:], fasta] + ch_fai // [[:], fai] + ch_gzi // [[:], gzi] -- bgzipped FASTA index (empty if FASTA is not bgzipped) + + main: + + // + // MODULE: DEEPSOMATIC_MAKEEXAMPLES (label: process_high) + // Input: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + // Output: .examples -- [meta, [tfrecord shards...]] -- serialised pileup examples + // .gvcf -- [meta, [gvcf tfrecord shards...]] + // + DEEPSOMATIC_MAKEEXAMPLES(ch_input, ch_fasta, ch_fai, ch_gzi) + + // + // MODULE: DEEPSOMATIC_CALLVARIANTS (label: process_gpu / process_high) + // Input: DEEPSOMATIC_MAKEEXAMPLES.out.examples -- [meta, [tfrecord shards...]] + // Output: .call_variants_tfrecords -- [meta, tfrecord] -- DNN variant call records + // + DEEPSOMATIC_CALLVARIANTS(DEEPSOMATIC_MAKEEXAMPLES.out.examples) + + // Join CALLVARIANTS output with MAKEEXAMPLES gVCF records (both keyed on meta) + // The postprocessing step needs both the DNN calls and the gVCF pileup records + ch_postproc_input = DEEPSOMATIC_CALLVARIANTS.out.call_variants_tfrecords.join( + DEEPSOMATIC_MAKEEXAMPLES.out.gvcf, + failOnMismatch: true + ).map { meta, call_tfrecord, gvcf_tfrecords -> + [meta, call_tfrecord, gvcf_tfrecords, [], []] + } + // ch_postproc_input: [meta, call_tfrecord, [gvcf_tfrecords...], [], []] + // trailing [] are for optional candidate positions and haplotype outputs (unused) + + // + // MODULE: DEEPSOMATIC_POSTPROCESSVARIANTS (label: process_medium) + // Input: [meta, call_tfrecord, [gvcf_tfrecords...], [], []] + // Output: .vcf -- [meta, vcf] -- somatic variant calls (VCF) + // .vcf_index -- [meta, tbi] + // .gvcf -- [meta, gvcf] -- genome VCF (all sites) + // .gvcf_index-- [meta, tbi] + // + DEEPSOMATIC_POSTPROCESSVARIANTS( + ch_postproc_input, + ch_fasta, + ch_fai, + ch_gzi + ) + + emit: + vcf = DEEPSOMATIC_POSTPROCESSVARIANTS.out.vcf // [meta, vcf] + vcf_index = DEEPSOMATIC_POSTPROCESSVARIANTS.out.vcf_index // [meta, tbi] + gvcf = DEEPSOMATIC_POSTPROCESSVARIANTS.out.gvcf // [meta, gvcf] + gvcf_index = DEEPSOMATIC_POSTPROCESSVARIANTS.out.gvcf_index // [meta, tbi] +} diff --git a/subworkflows/local/paired/paired_smallvar_germline.nf b/subworkflows/local/paired/paired_smallvar_germline.nf new file mode 100644 index 00000000..2ded1cf6 --- /dev/null +++ b/subworkflows/local/paired/paired_smallvar_germline.nf @@ -0,0 +1,181 @@ +// IMPORT MODULES +include { CLAIR3 } from '../../../modules/local/clair3/main.nf' + +// IMPORT SUBWORKFLOWS +include { DEEPVARIANT } from '../../../subworkflows/nf-core/deepvariant/main.nf' +include { SMALL_VARIANT_CONSENSUS as GERMLINE_CONSENSUS } from '../../../subworkflows/local/small_variant_consensus.nf' + +workflow PAIRED_SMALLVAR_GERMLINE { + + take: + normal_bams // [meta, normal_bam, normal_bai] -- normal sample BAMs from T/N pairs + fasta // [[:], fasta] + fai // [[:], fai] + clair3_models // [meta(id=model_name), model_dir] -- downloaded Clair3 model directories + + main: + ch_versions = channel.empty() + germline_vcf = channel.empty() + germline_tbi = channel.empty() + + // COMBINE NORMAL BAMS WITH DOWNLOADED CLAIR3 MODELS + // Clair3 requires the model directory path; models are keyed by model name (meta.id) + if(params.germline_var_keep.contains('clair')) { + + // Extract model name from meta.id for combine-by key + clair3_models + .map{ meta, file -> + def clair3_model_name = meta.id + return [meta, clair3_model_name, file] + } + .set{clair3_models} + // clair3_models: [meta(id=model_name), model_name_str, model_dir] + + // Emit [meta, clair3_model_name, bam, bai] to use model_name as the combine key + normal_bams + .map{ meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, meta.clair3_model, bam, bai ] + } + .set { normal_bams_model } + // normal_bams_model: [meta, clair3_model_name, bam, bai] + // clair3_model_name is the join key used by .combine(clair3_models, by:1) + + // + // MODULE: CLAIR3 (label: process_high) + // Input: [meta, bam, bai, model_dir, platform_str] + // fasta / fai + // Output: .vcf -- [meta, vcf] -- germline SNVs/indels + // .tbi -- [meta, tbi] + // + normal_bams_model + .combine(clair3_models,by:1) // join on clair3_model_name + .map {_clair3_model, meta_bam, bam, bai, _meta_model, model -> + def platform = (meta_bam.platform == 'pb') ? 'hifi' : meta_bam.platform + return [meta_bam, bam, bai, model, platform] + } + .set{ clair3_input_ch } + // clair3_input_ch: [meta, bam, bai, model_dir, platform_str] + // platform_str: 'hifi' for PacBio ('pb' → 'hifi'), otherwise meta.platform (e.g. 'ont') + + CLAIR3 ( + clair3_input_ch, + fasta, + fai + ) + + CLAIR3.out.vcf + .join(CLAIR3.out.tbi) + .map { meta, vcf , tbi -> + def new_meta = meta + [caller:'clair3'] + return [new_meta, vcf, tbi] + } + .set{clair3_ch} + // clair3_ch: [meta(+caller:'clair3'), vcf, tbi] + } + + // DEEPVARIANT + if(params.germline_var_keep.contains('deepvariant')) { + + // + // SUBWORKFLOW: DEEPVARIANT (nf-core) + // Input: [meta, bam, bai, []] -- [] is empty intervals (genome-wide) + // fasta / fai + // [[:],[]] x2 -- empty PAR/GFF interval files (not used for WGS) + // Output: .vcf -- [meta, vcf] + // .vcf_index -- [meta, tbi] + // + normal_bams + .map {meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + def intervals = [] + return [new_meta, bam, bai, intervals] + } + .set{deepvariant_input_ch} + // deepvariant_input_ch: [meta, bam, bai, []] + + DEEPVARIANT ( + deepvariant_input_ch, + fasta, + fai, + [[:],[]], // PAR regions (not used) + [[:],[]] // GFF annotation (not used) + ) + + DEEPVARIANT.out.vcf + .join(DEEPVARIANT.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepvariant'] + return [new_meta, vcf, tbi] + } + .set{deepvariant_ch} + // deepvariant_ch: [meta(+caller:'deepvariant'), vcf, tbi] + } + + // COMBINE GERMLINE VARIATION + // If both callers requested: run consensus subworkflow; otherwise pass through single-caller output + if (params.germline_var_keep.size() > 1) { + // Mix both caller VCFs into a single channel for GERMLINE_CONSENSUS + clair3_ch + .mix(deepvariant_ch) + .set{combined_germline_ch} + // combined_germline_ch: [meta(+caller), vcf, tbi] -- one item per caller per sample + + // SUBWORKFLOW: GERMLINE_CONSENSUS (SMALL_VARIANT_CONSENSUS alias) + // Normalise, annotate with caller ID, intersect, and combine per params + GERMLINE_CONSENSUS( + combined_germline_ch, + fasta, + fai, + params.prioritize_caller_germline, + params.germline_var_combine + ) + GERMLINE_CONSENSUS.out.vcf + .join(GERMLINE_CONSENSUS.out.tbi) + .set{ germline_vcf } + // germline_vcf: [meta(+caller from consensus), vcf, tbi] + } + else if (params.germline_var_keep == ['clair']) { + clair3_ch + .set{germline_vcf} + } + else if (params.germline_var_keep == ['deepvariant']) { + deepvariant_ch + .set{germline_vcf} + } + + // Strip 'caller' field from final germline VCF meta (not needed downstream) + germline_vcf + .map{ meta, vcf, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, vcf, tbi] + } + .set{germline_vcf} + + emit: + germline_vcf // [meta, vcf, tbi] -- final germline VCF (Clair3, DeepVariant, or consensus) +} diff --git a/subworkflows/local/paired/paired_smallvar_somatic.nf b/subworkflows/local/paired/paired_smallvar_somatic.nf new file mode 100644 index 00000000..c19553a9 --- /dev/null +++ b/subworkflows/local/paired/paired_smallvar_somatic.nf @@ -0,0 +1,167 @@ +// IMPORT MODULES +include { CLAIRS } from '../../../modules/local/clairs/main.nf' +include { BCFTOOLS_CONCAT } from '../../../modules/nf-core/bcftools/concat' +include { BCFTOOLS_SORT } from '../../../modules/nf-core/bcftools/sort' + +// IMPORT SUBWORKFLOWS +include { DEEPSOMATIC } from '../../../subworkflows/local/deepsomatic.nf' +include { SMALL_VARIANT_CONSENSUS as SOMATIC_CONSENSUS } from '../../../subworkflows/local/small_variant_consensus.nf' + +workflow PAIRED_SMALLVAR_SOMATIC { + + take: + tumor_normal_bams // [meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + fasta // [[:], fasta] + fai // [[:], fai] + + main: + ch_versions = channel.empty() + somatic_vcf = channel.empty() + somatic_tbi = channel.empty() + + // CLAIRS: somatic SNV/indel calling from T/N paired BAMs + if(params.somatic_var_keep.contains('clair')) { + // Append ClairS model name (from meta) as the last element for CLAIRS module + tumor_normal_bams + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return[meta , tumor_bam, tumor_bai, normal_bam, normal_bai, meta.clairS_model] + } + .set { clairs_input } + // clairs_input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, clairS_model_str] + + // + // MODULE: CLAIRS (label: process_high) + // Input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, model_str] + // fasta / fai + // Output: .vcfs -- [meta, [snv_vcf, indel_vcf]] -- separate SNV and indel VCFs + // .tbi -- [meta, [snv_tbi, indel_tbi]] + // + CLAIRS ( + clairs_input, + fasta, + fai + ) + + // CONCAT CLAIRS INDEL AND SNV OUTPUT + // ClairS outputs separate SNV and indel VCFs; merge into a single sorted VCF + CLAIRS.out.vcfs + .join(CLAIRS.out.tbi) + .set{clairs_out} + // clairs_out: [meta, [snv_vcf, indel_vcf], [snv_tbi, indel_tbi]] + + // + // MODULE: BCFTOOLS_CONCAT (label: process_medium) + // Input: [meta, [vcf...], [tbi...]] + // Output: .vcf -- [meta, vcf] -- unsorted concatenated SNV+indel VCF + // + BCFTOOLS_CONCAT ( + clairs_out + ) + + // + // MODULE: BCFTOOLS_SORT (label: process_medium) + // Input: [meta, vcf] + // Output: .vcf -- [meta, vcf] -- coordinate-sorted VCF + // .tbi -- [meta, tbi] + // + BCFTOOLS_SORT ( + BCFTOOLS_CONCAT.out.vcf + ) + + BCFTOOLS_SORT.out.vcf + .join(BCFTOOLS_SORT.out.tbi) + .map { meta, vcf , tbi -> + def new_meta = meta + [caller:'clairs'] + return [new_meta, vcf, tbi] + } + .set{clairs_ch} + // clairs_ch: [meta(+caller:'clairs'), vcf, tbi] -- merged and sorted ClairS somatic VCF + } + + // DEEPSOMATIC: somatic variant calling using deep learning T/N model + if(params.somatic_var_keep.contains('deepsomatic')) { + + // DeepSomatic expects [normal, tumor] order (opposite of input tuple) + tumor_normal_bams + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai -> + return [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + } + .set{ deepsomatic_input } + // deepsomatic_input: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + + // + // SUBWORKFLOW: DEEPSOMATIC (local) + // Input: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] + // [[:],[]] -- empty intervals + // fasta / fai / [[:],[]] -- empty GZI + // Output: .vcf -- [meta, vcf] + // .vcf_index -- [meta, tbi] + // + DEEPSOMATIC ( + deepsomatic_input, + [[:],[]], // intervals (empty = genome-wide) + fasta, + fai, + [[:],[]] // GZI (empty if FASTA is uncompressed) + ) + + DEEPSOMATIC.out.vcf + .join(DEEPSOMATIC.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepsomatic'] + return [new_meta, vcf, tbi] + } + .set{deepsomatic_ch} + // deepsomatic_ch: [meta(+caller:'deepsomatic'), vcf, tbi] + } + + // COMBINE SOMATIC VARIATION + // If both callers requested: run consensus subworkflow; otherwise pass through single-caller output + if (params.somatic_var_keep.size() > 1) { + clairs_ch + .mix(deepsomatic_ch) + .set{combine_somatic_ch} + // combine_somatic_ch: [meta(+caller), vcf, tbi] -- one item per caller per sample + + // SUBWORKFLOW: SOMATIC_CONSENSUS (SMALL_VARIANT_CONSENSUS alias) + SOMATIC_CONSENSUS( + combine_somatic_ch, + fasta, + fai, + params.prioritize_caller_somatic, + params.somatic_var_combine + ) + + SOMATIC_CONSENSUS.out.vcf + .join(SOMATIC_CONSENSUS.out.tbi) + .set{ somatic_vcf } + // somatic_vcf: [meta(+caller from consensus), vcf, tbi] + } + else if (params.somatic_var_keep == ['clair']) { + clairs_ch + .set{somatic_vcf} + } + else if (params.somatic_var_keep == ['deepsomatic']) { + deepsomatic_ch + .set{somatic_vcf} + } + + // Strip 'caller' from meta before emitting + somatic_vcf + .map{ meta, vcf, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, vcf, tbi] + } + .set{somatic_vcf} + + emit: + somatic_vcf // [meta, vcf, tbi] -- final somatic VCF (ClairS, DeepSomatic, or consensus) +} diff --git a/subworkflows/local/phasing_haplotyping.nf b/subworkflows/local/phasing_haplotyping.nf new file mode 100644 index 00000000..832d196d --- /dev/null +++ b/subworkflows/local/phasing_haplotyping.nf @@ -0,0 +1,394 @@ +// Import modules +include { LONGPHASE_PHASE as LONGPHASE_PHASE_GERMLINE } from '../../modules/nf-core/longphase/phase/main.nf' +include { LONGPHASE_PHASE as LONGPHASE_PHASE_SOMATIC } from '../../modules/nf-core/longphase/phase/main.nf' +include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' +include { LONGPHASE_MODCALL as LONGPHASE_MODCALL_GERMLINE } from '../../modules/local/longphase/modcall/main.nf' +include { LONGPHASE_MODCALL as LONGPHASE_MODCALL_SOMATIC } from '../../modules/local/longphase/modcall/main.nf' +include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' +include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main' + + +workflow PHASING_HAPLOTYPING { + take: + tumor_normal_bams // [meta, bam, bai] -- all samples: tumor, normal, and tumor-only + germline_vcf // [meta, vcf, tbi] -- germline small variants (from PAIRED_SMALLVAR_GERMLINE or TUMORONLY_SMALLVAR) + somatic_vcf // [meta, vcf, tbi] -- somatic small variants (from PAIRED_SMALLVAR_SOMATIC or TUMORONLY_SMALLVAR) + fasta // [[:], fasta] + fai // [[:], fai] + + main: + + // SPLIT INTO PAIRED AND TUMOR ONLY + // paired_data is set to the matched sample ID for paired samples, null/false for tumor-only + tumor_normal_bams + .branch { meta, _bams, _bai -> + paired: meta.paired_data + tumor_only: !meta.paired_data + } + .set { branched_bams } + // branched_bams.paired: [meta, bam, bai] -- tumor + normal from paired runs + // branched_bams.tumor_only: [meta, bam, bai] -- tumor-only samples + + branched_bams.paired + .set{ paired_ch } + + // Strip 'type' from tumor-only meta (no type distinction needed in this stream) + branched_bams.tumor_only + .map { meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, bam, bai ] + } + .set{ tumor_only_ch } + // tumor_only_ch: [meta (no type), bam, bai] + + // Split paired samples into normal and tumor streams for separate handling + paired_ch + .branch { meta, _bam, _bai -> + normal: meta.type == "normal" + tumor: meta.type == "tumor" + } + .set {paired_ch_branched} + // paired_ch_branched.normal: [meta, bam, bai] -- normal BAMs from T/N pairs + // paired_ch_branched.tumor: [meta, bam, bai] -- tumor BAMs from T/N pairs + + // Strip 'type' from paired normal/tumor meta to allow joining with tumor-only channel + paired_ch_branched.normal + .map { meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, bam, bai ] + } + .set{ paired_normal_ch } + // paired_normal_ch: [meta (no type), bam, bai] + + paired_ch_branched.tumor + .map { meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, bam, bai ] + } + .set{ paired_tumor_ch } + // paired_tumor_ch: [meta (no type), bam, bai] + + // Germline phasing uses normal BAMs (+ tumor-only BAMs used as their own "normal" proxy) + tumor_only_ch + .mix(paired_normal_ch) + .set { normal_bams_w_tumoronly_ch } + // normal_bams_w_tumoronly_ch: [meta, bam, bai] + // -- normal BAMs from T/N pairs + tumor-only BAMs (both phased with germline VCF) + + // Somatic phasing uses tumor BAMs (+ tumor-only BAMs) + tumor_only_ch + .mix(paired_tumor_ch) + .set{ tumor_bams_ch} + // tumor_bams_ch: [meta, bam, bai] -- tumor BAMs from T/N pairs + tumor-only BAMs + + // MODCALL: detect base modifications (e.g. 5mC) from aligned BAMs using Longphase + // Results are used as additional evidence during phasing + + if (!params.skip_modcall) { + + // + // MODULE: LONGPHASE_MODCALL_GERMLINE (label: process_high) + // Input: [meta, bam, bai] -- normal BAMs (+ tumor-only BAMs) + // fasta / fai + // Output: .mod_vcf -- [meta, vcf] -- base modification calls (e.g. CpG methylation) + // + LONGPHASE_MODCALL_GERMLINE ( + normal_bams_w_tumoronly_ch, + fasta, + fai + ) + + // + // MODULE: LONGPHASE_MODCALL_SOMATIC (label: process_high) + // Input: [meta, bam, bai] -- tumor BAMs (+ tumor-only BAMs) + // fasta / fai + // Output: .mod_vcf -- [meta, vcf] -- base modification calls for tumor + // + + LONGPHASE_MODCALL_SOMATIC ( + tumor_bams_ch, + fasta, + fai + ) + + } + + // Merge germline and somatic VCFs into a single file for somatic phasing + // Longphase requires all variant sites in one VCF to produce a consistent phase block + germline_vcf + .join(somatic_vcf) + .map { meta, germline_vcf, germline_tbi, somatic_vcf, somatic_tbi -> + def vcfs = [somatic_vcf, germline_vcf] // somatic first (higher priority in phasing) + def tbis = [somatic_tbi, germline_tbi] + return [ meta, vcfs, tbis] + } + .set{germline_somatic_vcfs} + // germline_somatic_vcfs (pre-concat): [meta, [somatic_vcf, germline_vcf], [somatic_tbi, germline_tbi]] + + // + // MODULE: BCFTOOLS_CONCAT (label: process_medium) + // Input: [meta, [vcfs...], [tbis...]] -- somatic + germline VCFs to concatenate + // Output: .vcf -- [meta, vcf] -- unsorted concatenated VCF + // + BCFTOOLS_CONCAT(germline_somatic_vcfs) + BCFTOOLS_CONCAT.out.vcf + .set{concat_out} + // concat_out: [meta, vcf] -- concatenated (unsorted) somatic+germline VCF + + // + // MODULE: BCFTOOLS_SORT (label: process_medium) + // Input: [meta, vcf] -- unsorted concatenated VCF + // Output: .vcf -- [meta, vcf] -- coordinate-sorted VCF + // .tbi -- [meta, tbi] + // + BCFTOOLS_SORT(concat_out) + BCFTOOLS_SORT.out.vcf + .set{germline_somatic_vcfs} + // germline_somatic_vcfs (final): [meta, vcf] -- sorted combined somatic+germline VCF for somatic phasing + + // PHASING: assign variants to haplotypes using Longphase + // - Germline phasing: uses normal BAMs + germline-only VCF (produces the phase blocks) + // - Somatic phasing: uses tumor BAMs + merged somatic+germline VCF (transfers germline phase to somatic sites) + if (!params.skip_modcall) { + // With modcall: include base-modification VCF as additional phasing evidence + normal_bams_w_tumoronly_ch + .join(germline_vcf) + .join(LONGPHASE_MODCALL_GERMLINE.out.mod_vcf) + .map { meta, bam, bai, vcf, _tbi, mods-> + def svs = [] // SVs for phasing are not used here + return [ meta, bam, bai, vcf, svs, mods ] + } + .set{ longphase_phase_germline_input_ch } + // longphase_phase_germline_input_ch: [meta, bam, bai, germline_vcf, [], mod_vcf] + + tumor_bams_ch + .join(germline_somatic_vcfs) + .join(LONGPHASE_MODCALL_SOMATIC.out.mod_vcf) + .map { meta, bam, bai, vcf, mods-> + def svs = [] + return [ meta, bam, bai, vcf, svs, mods ] + } + .set{ longphase_phase_somatic_input_ch } + // longphase_phase_somatic_input_ch: [meta, bam, bai, somatic+germline_vcf, [], mod_vcf] + } + else { + // Without modcall: empty lists for SVs and mods + normal_bams_w_tumoronly_ch + .join(germline_vcf) + .map { meta, bam, bai, vcf, _tbi -> + def svs = [] + def mods = [] + return [ meta, bam, bai, vcf, svs, mods ] + } + .set{ longphase_phase_germline_input_ch } + // longphase_phase_germline_input_ch: [meta, bam, bai, germline_vcf, [], []] + + tumor_bams_ch + .join(germline_somatic_vcfs) + .map { meta, bam, bai, vcf -> + def svs = [] + def mods = [] + return [ meta, bam, bai, vcf, svs, mods ] + } + .set{ longphase_phase_somatic_input_ch } + // longphase_phase_somatic_input_ch: [meta, bam, bai, somatic+germline_vcf, [], []] + } + + // + // MODULE: LONGPHASE_PHASE_GERMLINE (label: process_medium) + // Input: [meta, bam, bai, vcf, svs, mods] -- normal BAMs + germline VCF (± mod VCF) + // fasta / fai + // Output: .snv_vcf -- [meta, vcf] -- phased germline SNV VCF (PS tags added) + // .snv_vcf_index -- [meta, tbi] + // + LONGPHASE_PHASE_GERMLINE ( + longphase_phase_germline_input_ch, + fasta, + fai + ) + + LONGPHASE_PHASE_GERMLINE.out.snv_vcf + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf_index) + .set{ phased_germline_vcf } + // phased_germline_vcf: [meta, vcf, tbi] -- Longphase-phased germline VCF + + // + // MODULE: LONGPHASE_PHASE_SOMATIC (label: process_medium) + // Input: [meta, bam, bai, combined_vcf, svs, mods] -- tumor BAMs + somatic+germline VCF (± mod VCF) + // fasta / fai + // Output: .snv_vcf -- [meta, vcf] -- phased somatic (+ germline) VCF + // .snv_vcf_index -- [meta, tbi] + // + LONGPHASE_PHASE_SOMATIC ( + longphase_phase_somatic_input_ch, + fasta, + fai + ) + + LONGPHASE_PHASE_SOMATIC.out.snv_vcf + .join(LONGPHASE_PHASE_SOMATIC.out.snv_vcf_index) + .set{ phased_somatic_vcf } + // phased_somatic_vcf: [meta, vcf, tbi] -- Longphase-phased somatic (+ germline) VCF + + // HAPLOTAGGING: tag each read in the BAM with its haplotype (HP tag) using the phased germline VCF + // All sample types (tumor, normal, tumor-only) are haplotagged using the germline phase blocks + // 'type' is re-added to meta here so downstream tools can distinguish tumor from normal in the output + + if(!params.skip_modcall) { + // Strip 'type' from modcall output meta to allow joining with other channels (which have no 'type') + LONGPHASE_MODCALL_GERMLINE.out.mod_vcf + .map { meta, mods -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, mods ] + } + .set{modcall_vcf_ch} + // modcall_vcf_ch: [meta (no type), mod_vcf] -- base modification VCF from germline modcall + + // Build haplotag input for tumor-only samples (re-add type:"tumor") + tumor_only_ch + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) + .join(modcall_vcf_ch) + .map { meta, bam, bai, vcf, mods -> + def new_meta = meta + [type : "tumor"] + def svs = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ tumor_only_ch } + // tumor_only_ch (updated): [meta+type:tumor, bam, bai, phased_germline_vcf, [], mod_vcf] + + paired_tumor_ch + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) + .join(modcall_vcf_ch) + .map { meta, bam, bai, vcf, mods -> + def new_meta = meta + [type : "tumor"] + def svs = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ paired_tumor_ch } + // paired_tumor_ch (updated): [meta+type:tumor, bam, bai, phased_germline_vcf, [], mod_vcf] + + paired_normal_ch + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) + .join(modcall_vcf_ch) + .map { meta, bam, bai, vcf, mods -> + def new_meta = meta + [type : "normal"] + def svs = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ paired_normal_ch } + // paired_normal_ch (updated): [meta+type:normal, bam, bai, phased_germline_vcf, [], mod_vcf] + + } + else { + // Without modcall: empty lists for mods + tumor_only_ch + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) + .map { meta, bam, bai, vcf -> + def new_meta = meta + [type : "tumor"] + def svs = [] + def mods = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ tumor_only_ch } + // tumor_only_ch (updated): [meta+type:tumor, bam, bai, phased_germline_vcf, [], []] + + paired_tumor_ch + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) + .map { meta, bam, bai, vcf -> + def new_meta = meta + [type : "tumor"] + def svs = [] + def mods = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ paired_tumor_ch } + // paired_tumor_ch (updated): [meta+type:tumor, bam, bai, phased_germline_vcf, [], []] + + paired_normal_ch + .join(LONGPHASE_PHASE_GERMLINE.out.snv_vcf) + .map { meta, bam, bai, vcf -> + def new_meta = meta + [type : "normal"] + def svs = [] + def mods = [] + return [new_meta, bam, bai, vcf, svs, mods] + } + .set{ paired_normal_ch } + // paired_normal_ch (updated): [meta+type:normal, bam, bai, phased_germline_vcf, [], []] + + } + + // Merge all sample types for haplotagging in a single LONGPHASE_HAPLOTAG call + tumor_only_ch + .mix(paired_tumor_ch) + .mix(paired_normal_ch) + .set {longphase_haplotag_input_ch} + // longphase_haplotag_input_ch: [meta(+type), bam, bai, phased_germline_vcf, [], mod_vcf_or_[]] + // -- all samples (tumor-only, paired tumor, paired normal) + + // + // MODULE: LONGPHASE_HAPLOTAG (label: process_medium) + // Input: [meta, bam, bai, phased_vcf, svs, mods] -- BAM + phased germline VCF (± mod VCF) + // fasta / fai + // Output: .bam -- [meta, bam] -- BAM with HP (haplotype) and PS (phase set) tags added to reads + // + LONGPHASE_HAPLOTAG ( + longphase_haplotag_input_ch, + fasta, + fai + ) + + LONGPHASE_HAPLOTAG.out.bam + .set{ tumor_normal_hapbams_ch } + // tumor_normal_hapbams_ch (pre-index): [meta, bam] -- haplotagged BAM (no index yet) + + // + // MODULE: SAMTOOLS_INDEX (label: process_medium) + // Input: [meta, bam] -- haplotagged BAM + // Output: .bai -- [meta, bai] + // + SAMTOOLS_INDEX ( + tumor_normal_hapbams_ch + ) + tumor_normal_hapbams_ch + .join(SAMTOOLS_INDEX.out.bai) + .set{ tumor_normal_hapbams_ch } + // tumor_normal_hapbams_ch (final): [meta, bam, bai] -- haplotagged BAM with index + + + emit: + tumor_normal_hapbams_ch // [meta, bam, bai] -- haplotagged BAMs for all samples + phased_germline_vcf // [meta, vcf, tbi] -- phased germline VCF (used by SEVERUS + VEP) + phased_somatic_vcf // [meta, vcf, tbi] -- phased somatic VCF (used by VEP) +} diff --git a/subworkflows/local/prepare_annotation.nf b/subworkflows/local/prepare_annotation.nf index 8771680d..f6b98e78 100644 --- a/subworkflows/local/prepare_annotation.nf +++ b/subworkflows/local/prepare_annotation.nf @@ -3,12 +3,12 @@ include {ENSEMBLVEP_DOWNLOAD } from '../../modules/nf-core/ensemblvep/download/m workflow PREPARE_ANNOTATION { take: - vep_cache - vep_cache_version - vep_genome - vep_args - vep_species - download_vep_cache + vep_cache // path: local VEP cache directory (or S3 annotation-cache URL) + vep_cache_version // int: VEP cache version (e.g. 110) + vep_genome // str: genome assembly string (e.g. "GRCh38") + vep_args // str: extra VEP CLI arguments (parsed to detect --merged / --refseq) + vep_species // str: species name (e.g. "homo_sapiens") + download_vep_cache // bool: if true, download cache via ENSEMBLVEP_DOWNLOAD instead of using local path main: @@ -16,11 +16,16 @@ workflow PREPARE_ANNOTATION { ensemblvep_cache = channel.empty() // - // MODULE: ENSEMBLVEP_DOWNLOAD + // MODULE: ENSEMBLVEP_DOWNLOAD (label: process_medium) + // Only runs when params.download_vep_cache == true + // Input: vep_download_info -- [[:], vep_genome, vep_species, vep_cache_version] + // Output: .cache -- downloaded and extracted VEP cache directory // if (download_vep_cache) { + // Build input tuple: empty meta + genome/species/version for ENSEMBLVEP_DOWNLOAD vep_download_info = channel.of([[],vep_genome, vep_species, vep_cache_version]) + // vep_download_info: [[:], genome_str, species_str, cache_version_int] ENSEMBLVEP_DOWNLOAD ( vep_download_info @@ -31,6 +36,8 @@ workflow PREPARE_ANNOTATION { } else { + // Validate that the local cache directory exists and resolve the correct subdirectory + // The annotation-cache S3 bucket uses a version-prefixed path; local paths do not def vep_annotation_cache_key = (vep_cache == "s3://annotation-cache/vep_cache/") ? "${vep_cache_version}_${vep_genome}/" : "" def vep_species_suffix = vep_args.contains("--merged") ? '_merged' : (vep_args.contains("--refseq") ? '_refseq' : '') def vep_cache_dir = "${vep_annotation_cache_key}${vep_species}${vep_species_suffix}/${vep_cache_version}_${vep_genome}" @@ -43,11 +50,13 @@ workflow PREPARE_ANNOTATION { } } + // Collect the resolved cache root as a channel value ensemblvep_cache = channel.fromPath(file("${vep_cache}/${vep_annotation_cache_key}"), checkIfExists: true).collect() } + // ensemblvep_cache: path (or list-of-paths) to the VEP cache root directory emit: - vep_cache = ensemblvep_cache + vep_cache = ensemblvep_cache // path -- VEP cache directory (downloaded or validated local) versions = ch_versions } diff --git a/subworkflows/local/prepare_reference_files.nf b/subworkflows/local/prepare_reference_files.nf index 324815e2..efc867d9 100644 --- a/subworkflows/local/prepare_reference_files.nf +++ b/subworkflows/local/prepare_reference_files.nf @@ -13,13 +13,13 @@ include { WGET } from '../../modules/nf-core/wget/main workflow PREPARE_REFERENCE_FILES { take: - fasta - ascat_alleles - ascat_loci - ascat_loci_gc - ascat_loci_rt - basecall_meta - clair3_modelMap + fasta // str: path to reference FASTA (may be .gz) + ascat_alleles // str: path to ASCAT allele files (directory or .zip), or null + ascat_loci // str: path to ASCAT loci files (directory or .zip), or null + ascat_loci_gc // str: path to ASCAT GC correction file (.zip or direct), or null + ascat_loci_rt // str: path to ASCAT RT correction file (.zip or direct), or null + basecall_meta // [meta, basecall_model_str, kinetics_str] -- from METAEXTRACT per sample + clair3_modelMap // Map -- used to resolve download URLs main: ch_versions = channel.empty() @@ -29,20 +29,25 @@ workflow PREPARE_REFERENCE_FILES { gc_file = channel.empty() rt_file = channel.empty() - // Check if fasta and gtf are zipped + // Decompress FASTA if gzipped; pass through as-is if already uncompressed if (fasta.endsWith('.gz')){ + // + // MODULE: UNZIP_FASTA (PIGZ_UNCOMPRESS alias; label: process_medium) + // Input: [[:], fasta.gz] + // Output: .file -- [[:], fasta] -- decompressed FASTA + // UNZIP_FASTA( [ [:], fasta ]) ch_prepared_fasta = UNZIP_FASTA.out.file ch_versions = ch_versions.mix(UNZIP_FASTA.out.versions) } else { - ch_prepared_fasta = [ [:], fasta ] + ch_prepared_fasta = channel.value([ [:], fasta ]) } - // ch_prepared_fasta: [[:], fasta_path] -- empty meta; uncompressed if input was .gz - - // if clair3 model is specified, then download that - // otherwise use info in bam header and download that + // ch_prepared_fasta: [[:], fasta_path] -- empty meta; uncompressed FASTA + // Build Clair3 model download URLs from basecall metadata + // Priority: explicit meta.clair3_model param > auto-detected from BAM header via modelMap + // PacBio models from HKU mirror; ONT models from Oxford Nanopore CDN basecall_meta.map { meta, basecall_model_meta, _kinetics_meta -> def id_new = basecall_model_meta ? clair3_modelMap.get(basecall_model_meta) : basecall_model_meta def meta_new = [id: id_new] @@ -51,31 +56,37 @@ workflow PREPARE_REFERENCE_FILES { def url = "${download_prefix}/${model}.tar.gz" return [ meta_new, url ] } - .unique() + .unique() // deduplicate: multiple samples with the same basecall model share one download .set{ clair3_model_urls } - // [meta(id=clair3_model_id), download_url] -- one item per unique Clair3 model; deduplicated with .unique() + // clair3_model_urls: [meta(id=clair3_model_name), download_url_str] + // one item per unique Clair3 model needed across all samples // - // MODULE: Download model + // MODULE: WGET (label: process_single) + // Input: [meta, url_str] -- model name (id) + download URL + // Output: .outfile -- [meta, tarball] -- downloaded .tar.gz model archive // - WGET ( clair3_model_urls ) ch_versions = ch_versions.mix(WGET.out.versions) // - // MODULE: Untar model + // MODULE: UNTAR (label: process_single) + // Input: WGET.out.outfile -- [meta, tarball] + // Output: .untar -- [meta, model_dir] -- extracted Clair3 model directory // - UNTAR ( WGET.out.outfile ) UNTAR.out.untar.set { downloaded_clair3_models } - // [meta(id=clair3_model_id), model_dir] -- extracted Clair3 model directory + // downloaded_clair3_models: [meta(id=clair3_model_name), model_dir] // - // MODULE: Index the fasta + // MODULE: SAMTOOLS_FAIDX (label: process_single) + // Input: [[:], fasta, []] -- empty meta + empty regions file (index full FASTA) + // false -- do not write fai to stdout + // Output: .fai -- [[:], fai_path] // SAMTOOLS_FAIDX ( ch_prepared_fasta.map { meta, fa -> [meta, fa, []] }, @@ -86,51 +97,66 @@ workflow PREPARE_REFERENCE_FILES { // ch_prepared_fai: [[:], fai_path] -- empty meta // - // Prepare ASCAT files + // Prepare ASCAT reference files + // Each file set can be provided as a .zip archive or a plain directory/file path + // All ASCAT outputs are flat file collections (no meta tuple) for use with ASCAT module // - - // prepare ascat and controlfreec reference files if ( !params.skip_ascat ) { + // Allele files: per-chromosome SNP allele frequency files (used for LogR/BAF calculation) if (!ascat_alleles) allele_files = channel.empty() else if (ascat_alleles.endsWith(".zip")) { + // MODULE: UNZIP_ALLELES (UNZIP alias; label: process_single) + // Input: [meta(id=basename), [zip_file]] -- collected zip + // Output: .unzipped_archive -- [meta, dir] -- extracted directory; flatMap lists individual files UNZIP_ALLELES(channel.fromPath(file(ascat_alleles)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) allele_files = UNZIP_ALLELES.out.unzipped_archive.flatMap { it -> it[1].listFiles() }.collect() + // allele_files: [path, path, ...] -- all per-chromosome allele files collected ch_versions = ch_versions.mix(UNZIP_ALLELES.out.versions) } else allele_files = channel.fromPath(ascat_alleles).collect() + // Loci files: per-chromosome SNP loci positions if (!ascat_loci) loci_files = channel.empty() else if (ascat_loci.endsWith(".zip")) { + // MODULE: UNZIP_LOCI (UNZIP alias; label: process_single) UNZIP_LOCI(channel.fromPath(file(ascat_loci)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) loci_files = UNZIP_LOCI.out.unzipped_archive.flatMap { it -> it[1].listFiles() }.collect() + // loci_files: [path, path, ...] -- all per-chromosome loci files collected ch_versions = ch_versions.mix(UNZIP_LOCI.out.versions) } else loci_files = channel.fromPath(ascat_loci).collect() + // GC correction file: genome-wide GC content per locus (optional) if (!ascat_loci_gc) gc_file = channel.value([]) else if ( ascat_loci_gc.endsWith(".zip") ) { + // MODULE: UNZIP_GC (UNZIP alias; label: process_single) UNZIP_GC(channel.fromPath(file(ascat_loci_gc)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) gc_file = UNZIP_GC.out.unzipped_archive.flatMap { it -> it[1].listFiles() }.collect() + // gc_file: [path, ...] -- GC correction file(s) collected ch_versions = ch_versions.mix(UNZIP_GC.out.versions) } else gc_file = channel.fromPath(ascat_loci_gc).collect() + // Replication timing correction file: RT correction per locus (optional) if (!ascat_loci_rt) rt_file = channel.value([]) else if (ascat_loci_rt.endsWith(".zip")) { + // MODULE: UNZIP_RT (UNZIP alias; label: process_single) UNZIP_RT(channel.fromPath(file(ascat_loci_rt)).collect().map{ it -> [ [ id:it[0].baseName ], it ] }) rt_file = UNZIP_RT.out.unzipped_archive.flatMap { it -> it[1].listFiles() }.collect() + // rt_file: [path, ...] -- RT correction file(s) collected ch_versions = ch_versions.mix(UNZIP_RT.out.versions) } else rt_file = channel.fromPath(ascat_loci_rt).collect() } emit: - prepped_fasta = ch_prepared_fasta // [[:], fasta_path] - prepped_fai = ch_prepared_fai // [[:], fai_path] + prepped_fasta = ch_prepared_fasta // [[:], fasta_path] -- uncompressed reference FASTA + prepped_fai = ch_prepared_fai // [[:], fai_path] -- samtools FAI index // ASCAT reference files -- flat file collections (no meta tuple wrapper) - allele_files - loci_files - gc_file - rt_file + // Each is a list of paths collected into a single channel value + allele_files // [path, ...] -- per-chromosome allele frequency files + loci_files // [path, ...] -- per-chromosome loci position files + gc_file // [path, ...] -- GC correction file ([] if not provided) + rt_file // [path, ...] -- replication timing correction file ([] if not provided) - downloaded_clair3_models // [meta(id=clair3_model_id), model_dir] + downloaded_clair3_models // [meta(id=clair3_model_name), model_dir] versions = ch_versions } diff --git a/subworkflows/local/small_variant_consensus.nf b/subworkflows/local/small_variant_consensus.nf new file mode 100644 index 00000000..9befdb2c --- /dev/null +++ b/subworkflows/local/small_variant_consensus.nf @@ -0,0 +1,218 @@ + +include { BCFTOOLS_NORM } from '../../modules/nf-core/bcftools/norm/main' +include { BCFTOOLS_ISEC } from '../../modules/nf-core/bcftools/isec/main' +include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/query/main' +include { BCFTOOLS_ANNOTATE } from '../../modules/nf-core/bcftools/annotate/main' +include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat/main' +include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort/main' + + + +workflow SMALL_VARIANT_CONSENSUS { + take: + mixed_vcfs // [meta(+caller field), vcf, tbi] -- one item per caller per sample + // meta.caller is one of: 'clair3', 'clairs-to', 'clairs', 'deepvariant', 'deepsomatic' + fasta // [[:], fasta] + fai // [[:], fai] + prioritize_caller // str: which caller's calls take priority ('deepvariant'/'deepsomatic' or 'clair') + combine_method // str: 'consensus' (intersection only) or 'all' (intersection + private calls from priority caller) + + main: + + // + // MODULE: BCFTOOLS_NORM (label: process_medium) + // Input: [meta, vcf, tbi] -- per-caller VCF + // Output: .vcf -- [meta, vcf] -- left-aligned, normalised VCF + // .tbi -- [meta, tbi] + // + BCFTOOLS_NORM(mixed_vcfs, fasta) + + BCFTOOLS_NORM.out.vcf + .join(BCFTOOLS_NORM.out.tbi) + .set {normalized_vcfs} + // normalized_vcfs: [meta(+caller), vcf, tbi] -- normalised per-caller VCF + + // + // MODULE: BCFTOOLS_QUERY (label: process_single) + // Extract variant positions to build a caller-annotation file used by BCFTOOLS_ANNOTATE + // Input: [meta, vcf, tbi] -- normalised VCF + // Output: .output -- [meta, tsv] -- tab-separated annotation file (CHROM POS CALLER) + // .index -- [meta, tbi] + // + BCFTOOLS_QUERY(normalized_vcfs, [], [], []) + + // Prepare BCFTOOLS_ANNOTATE input: VCF + caller-name annotation file + normalized_vcfs + .join(BCFTOOLS_QUERY.out.output) + .join(BCFTOOLS_QUERY.out.index) + .map{ meta, vcf, tbi, annotations, annotations_index -> + def columns = [] // no extra column specs + def header_lines = [] // no extra header lines + def rename_chrs = [] // no chromosome renaming + return [ meta, vcf, tbi, annotations, annotations_index, columns, header_lines, rename_chrs ] + } + .set{annotate_input} + // annotate_input: [meta, vcf, tbi, annotations_tsv, annotations_tbi, [], [], []] + + // + // MODULE: BCFTOOLS_ANNOTATE (label: process_medium) + // Adds CALLER INFO field to each VCF record using the query-generated annotation file + // Input: [meta, vcf, tbi, annotations_tsv, annotations_tbi, [], [], []] + // Output: .vcf -- [meta, vcf] -- VCF with CALLER annotation added + // .tbi -- [meta, tbi] + // + BCFTOOLS_ANNOTATE(annotate_input) + + BCFTOOLS_ANNOTATE.out.vcf + .join(BCFTOOLS_ANNOTATE.out.tbi) + .set{annotated_vcfs} + // annotated_vcfs: [meta(+caller), vcf, tbi] -- VCF with CALLER INFO tag + + // Branch annotated VCFs by caller family for the intersection step + annotated_vcfs + .branch { meta, _vcfs, _tbi -> + deepvariant: meta.caller in [ 'deepvariant', 'deepsomatic' ] + clair: meta.caller in ['clair3','clairs-to','clairs'] + } + .set{annotated_vcfs_branched} + // annotated_vcfs_branched.deepvariant: [meta(caller=deepvariant/deepsomatic), vcf, tbi] + // annotated_vcfs_branched.clair: [meta(caller=clair3/clairs-to/clairs), vcf, tbi] + + clair_ch = annotated_vcfs_branched.clair + deepvariant_ch = annotated_vcfs_branched.deepvariant + + // Strip 'caller' field from meta before joining so both channels share the same key + clair_ch. + map {meta, vcfs, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'type', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, vcfs, tbi] + } + .set{clair_ch} + // clair_ch: [meta (no caller), vcf, tbi] + + deepvariant_ch + .map {meta, vcfs, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'type', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return [ new_meta, vcfs, tbi] + } + .set{deepvariant_ch} + // deepvariant_ch: [meta (no caller), vcf, tbi] + + // Join DeepVariant and Clair VCFs per sample into a single tuple for BCFTOOLS_ISEC + deepvariant_ch + .join(clair_ch) + .map { meta, deepvar_vcf, deepvar_tbi, clair_vcf, clair_tbi -> + def vcfs = [deepvar_vcf, clair_vcf] + def tbis = [deepvar_tbi, clair_tbi] + return [ meta, vcfs, tbis] + } + .set{mixed_vcfs} + // mixed_vcfs (re-paired): [meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi]] + + // Add empty optional fields required by BCFTOOLS_ISEC + mixed_vcfs + .map{ meta, vcfs, tbis -> + def file = [] // no regions file + def target = [] // no target sites + def regions = [] // no region string + return [meta, vcfs, tbis, file, target, regions] + } + .set{isec_input} + // isec_input: [meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi], [], [], []] + + // + // MODULE: BCFTOOLS_ISEC (label: process_medium) + // Computes the intersection and private sets for the two callers + // Input: [meta, [vcf1, vcf2], [tbi1, tbi2], [], [], []] + // Output (custom nf-core module outputs): + // .deepvar_consensus_vcf -- [meta, vcf] -- variants called by both callers (DeepVariant record) + // .clair_consensus_vcf -- [meta, vcf] -- variants called by both callers (Clair record) + // .deepvar_private_vcf -- [meta, vcf] -- variants unique to DeepVariant + // .clair_private_vcf -- [meta, vcf] -- variants unique to Clair + // (+ corresponding .tbi outputs for each) + // + BCFTOOLS_ISEC(isec_input) + + if (combine_method == 'consensus') { + // Take only the intersection: variants called by BOTH callers + // Use the record from the prioritized caller + if (prioritize_caller in ['deepvariant', 'deepsomatic']) { + BCFTOOLS_ISEC.out.deepvar_consensus_vcf + .set{vcf} + BCFTOOLS_ISEC.out.deepvar_consensus_tbi + .set{tbi} + } + else if (prioritize_caller == 'clair') { + BCFTOOLS_ISEC.out.clair_consensus_vcf + .set{vcf} + BCFTOOLS_ISEC.out.clair_consensus_tbi + .set{tbi} + } + // vcf/tbi: [meta, vcf/tbi] -- consensus-only calls from the priority caller + } + + else if (combine_method == 'all') { + // Take the intersection PLUS the private calls from the prioritized caller + // (private calls from the non-priority caller are discarded) + if (prioritize_caller in ['deepvariant', 'deepsomatic']) { + // consensus (DeepVariant record) + DeepVariant-private variants + BCFTOOLS_ISEC.out.deepvar_consensus_vcf + .join(BCFTOOLS_ISEC.out.deepvar_consensus_tbi) + .join(BCFTOOLS_ISEC.out.clair_private_vcf) + .join(BCFTOOLS_ISEC.out.clair_private_tbi) + .map{ meta, deepvar_vcf, deepvar_tbi, clair_vcf, clair_tbi -> + return[meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi]] + } + .set{concat_input} + // concat_input: [meta, [consensus_vcf, private_vcf], [consensus_tbi, private_tbi]] + BCFTOOLS_CONCAT(concat_input) + BCFTOOLS_CONCAT.out.vcf + .set{concat_out} + } + else if (prioritize_caller == 'clair') { + // consensus (Clair record) + Clair-private variants + BCFTOOLS_ISEC.out.deepvar_private_vcf + .join(BCFTOOLS_ISEC.out.deepvar_private_tbi) + .join(BCFTOOLS_ISEC.out.clair_consensus_vcf) + .join(BCFTOOLS_ISEC.out.clair_consensus_tbi) + .map{ meta, deepvar_vcf, deepvar_tbi, clair_vcf, clair_tbi -> + return[meta, [deepvar_vcf, clair_vcf], [deepvar_tbi, clair_tbi]] + } + .set{concat_input} + // concat_input: [meta, [private_vcf, consensus_vcf], [private_tbi, consensus_tbi]] + BCFTOOLS_CONCAT(concat_input) + BCFTOOLS_CONCAT.out.vcf + .set{concat_out} + } + // concat_out: [meta, vcf] -- unsorted concatenated VCF (consensus + priority-caller-private) + BCFTOOLS_SORT(concat_out) + BCFTOOLS_SORT.out.vcf + .set{vcf} + BCFTOOLS_SORT.out.tbi + .set{tbi} + // vcf/tbi: [meta, vcf/tbi] -- sorted combined VCF + } + + emit: + vcf // [meta, vcf] -- final consensus/combined VCF + tbi // [meta, tbi] + +} diff --git a/subworkflows/local/tumor_normal_happhase.nf b/subworkflows/local/tumor_normal_happhase.nf deleted file mode 100644 index b2cb135f..00000000 --- a/subworkflows/local/tumor_normal_happhase.nf +++ /dev/null @@ -1,279 +0,0 @@ -include { CLAIR3 } from '../../modules/local/clair3/main.nf' -include { LONGPHASE_PHASE } from '../../modules/nf-core/longphase/phase/main.nf' -include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' -include { CLAIRS } from '../../modules/local/clairs/main.nf' -include { BCFTOOLS_CONCAT } from '../../modules/nf-core/bcftools/concat' -include { BCFTOOLS_SORT } from '../../modules/nf-core/bcftools/sort' - -workflow TUMOR_NORMAL_HAPPHASE { - take: - mixed_bams - fasta - fai - downloaded_clair3_models - - main: - - ch_versions = channel.empty() - tumor_normal_severus = channel.empty() - somatic_vep = channel.empty() - germline_vep = channel.empty() - - // Branch input bams in normal and tumour - mixed_bams - .branch{ meta, _bam, _bai -> - normal: meta.type == "normal" - tumor: meta.type == "tumor" - } - .set{ mixed_bams } - - // Get normal bams and add platform/model info for Clair3 usage - // remove type from so that information can be merged easier later - - downloaded_clair3_models - .map{ meta, file -> - def clair3_model = meta.id - return [meta, clair3_model, file] - } - .set{downloaded_clair3_models} - - mixed_bams.normal - .map{ meta, bam, bai -> - def new_meta = [id: meta.id, - paired_data: meta.paired_data, - platform: meta.platform, - sex: meta.sex, - fiber: meta.fiber, - clair3_model: meta.clair3_model, - clairS_model: meta.clairS_model, - clairSTO_model: meta.clairSTO_model, - kinetics: meta.kinetics] - return [ new_meta, meta.clair3_model, bam, bai ] - } - .set { normal_bams_model } - // [meta, clair3_model_id, bam, bai] -- keyed by model ID for .combine() with downloaded_clair3_models - - normal_bams_model - .combine(downloaded_clair3_models,by:1) - .map {_clair3_model, meta_bam, bam, bai, _meta_model, model -> - def platform = (meta_bam.platform == 'pb') ? 'hifi' : meta_bam.platform - return [meta_bam, bam, bai, model, platform] - } - .set{ normal_bams } - // [meta, bam, bai, clair3_model_dir, platform] -- type excluded from meta; platform is "hifi" for PacBio - - /* - .map{ basecall_model, meta, bam, bai, meta2, model -> - def platform = (meta.platform == "pb") ? "hifi" : "ont" - return [meta, bam, bai, model, platform] - } - */ - - // Get tumour bams - // remove type from so that information can be merged easier later - mixed_bams.tumor - .map{ meta, bam, bai -> - def new_meta = [id: meta.id, - paired_data: meta.paired_data, - platform: meta.platform, - sex: meta.sex, - fiber: meta.fiber, - clair3_model: meta.clair3_model, - clairS_model: meta.clairS_model, - clairSTO_model: meta.clairSTO_model, - kinetics: meta.kinetics] - return[new_meta, bam, bai] - } - .set{ tumor_bams } - // [meta, bam, bai] -- type excluded from meta for downstream groupTuple merge - - // - // MODULE: CLAIR3 - // small germline variant calling - - CLAIR3 ( - normal_bams, - fasta, - fai - ) - - // Add germline vcf to normal bams - // remove clair3 model information - - normal_bams - .join(CLAIR3.out.vcf) - .map { meta, bam, bai, _clair3_model, _platform, vcf -> - def svs = [] - def mods = [] - return [meta, bam, bai, vcf, svs, mods] - } - .set{ normal_bams_germlinevcf } - // [meta, bam, bai, germline_vcf, [], []] -- svs and mods are empty placeholders for LONGPHASE_PHASE input - - // - // MODULE: LONGPHASE_PHASE - // - // Phase normals - - LONGPHASE_PHASE ( - normal_bams_germlinevcf, - fasta, - fai - ) - - ch_versions = ch_versions.mix(LONGPHASE_PHASE.out.versions) - - LONGPHASE_PHASE.out.snv_vcf - .map { meta, vcf -> - def extra = [] - return [meta, vcf, extra] - } - .set { germline_vep } - // [meta, clair3_vcf, []] -- germline small variants for VEP annotation - - // Add phased vcf to normal bams - // Add type information back - // both are needed for mixing with the tumor bams - - normal_bams - .join(LONGPHASE_PHASE.out.snv_vcf) - .map { meta, bam, bai, _clair3_model, _platform, vcf -> - def new_meta = meta + [type: "normal"] - def svs = [] - def mods = [] - return[new_meta, bam, bai, vcf, svs, mods] - } - .set{ normal_bams } - // [meta+{type:"normal"}, bam, bai, phased_vcf, [], []] -- type re-added; svs and mods are empty placeholders for LONGPHASE_HAPLOTAG - - // Add phased vcf to tumour bams and type information - // mix with the normal bams - - tumor_bams - .join(LONGPHASE_PHASE.out.snv_vcf) - .map { meta, bam, bai, vcf -> - def new_meta = meta + [type: "tumor"] - def svs = [] - def mods = [] - return [new_meta, bam, bai, vcf, svs, mods] - } - .mix(normal_bams) - .set{ mixed_bams_vcf } - // [meta+{type}, bam, bai, phased_normal_vcf, [], []] -- tumor and normal items both carry the same phased normal VCF - - // - // MODULE: LONGPHASE_HAPLOTAG - // - - // haplotag tumor and normal bams with normal vcf files for both - LONGPHASE_HAPLOTAG ( - mixed_bams_vcf, - fasta, - fai - ) - - ch_versions = ch_versions.mix(LONGPHASE_HAPLOTAG.out.versions) - - // Get final tagged bams - LONGPHASE_HAPLOTAG.out.bam - .set{ mixed_hapbams } - // [meta+{type}, haplotagged_bam] - - // - // MODULE: SAMTOOLS_INDEX - // - // index the haplotaged bams - - SAMTOOLS_INDEX ( - mixed_hapbams - ) - - // Add index to channel - mixed_bams_vcf - .join(mixed_hapbams) - .join(SAMTOOLS_INDEX.out.bai) - .set{ mixed_hapbams } - // [meta+{type}, orig_bam, orig_bai, vcf, svs, mods, hapbam, hapbai] - - // Group everything back together in one channel - mixed_hapbams - .map { meta, _bam, _bai, _vcf, _snvs, _mods, hapbam, hapbai -> - def new_meta = [id: meta.id, - paired_data: meta.paired_data, - platform: meta.platform, - sex: meta.sex, - fiber: meta.fiber, - clair3_model: meta.clair3_model, - clairS_model: meta.clairS_model, - clairSTO_model: meta.clairSTO_model, - kinetics: meta.kinetics] - return[new_meta, [[type: meta.type], hapbam], [[type: meta.type], hapbai]] - } - .groupTuple(size: 2) - .map{ meta, bam, bai -> - def normal_bam = bam[0][0].type == "normal" ? bam[0][1] : bam[1][1] - def tumor_bam = bam[0][0].type == "tumor" ? bam[0][1] : bam[1][1] - def normal_bai = bai[0][0].type == "normal" ? bai[0][1] : bai[1][1] - def tumor_bai = bai[0][0].type == "tumor" ? bai[0][1] : bai[1][1] - // Return channel - return [ meta, tumor_bam, tumor_bai, normal_bam, normal_bai ] - } - .join(LONGPHASE_PHASE.out.snv_vcf) - .join(LONGPHASE_PHASE.out.snv_vcf_index) - .set{tumor_normal_severus} - // [meta, tumor_hapbam, tumor_bai, normal_hapbam, normal_bai, phased_vcf, phased_tbi] - - // Get ClairS input channel - tumor_normal_severus - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, _vcf, _tbi -> - return[meta , tumor_bam, tumor_bai, normal_bam, normal_bai, meta.clairS_model] - } - .set { clairs_input } - // [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, clairS_model] - - // - // MODULE: CLAIRS - // - - CLAIRS ( - clairs_input, - fasta, - fai - ) - - CLAIRS.out.vcfs - .join(CLAIRS.out.tbi) - .set{clairs_out} - - // - // MODULE: BCFTOOLS_CONCAT - // - - BCFTOOLS_CONCAT ( - clairs_out - ) - - // - // MODULE: BCFTOOLS_SORT - // - - BCFTOOLS_SORT ( - BCFTOOLS_CONCAT.out.vcf - ) - - BCFTOOLS_SORT.out.vcf - .map { meta, vcf -> - def extra = [] - return [meta, vcf, extra] - } - .set { somatic_vep } - // [meta, sorted_clairs_vcf, []] -- somatic small variants (SNV+indel merged) for VEP annotation - - emit: - tumor_normal_severus - somatic_vep - germline_vep - versions = ch_versions - -} diff --git a/subworkflows/local/tumor_only/tumoronly_smallvar.nf b/subworkflows/local/tumor_only/tumoronly_smallvar.nf new file mode 100644 index 00000000..e9b50e2f --- /dev/null +++ b/subworkflows/local/tumor_only/tumoronly_smallvar.nf @@ -0,0 +1,264 @@ +// IMPORT MODULES +include { CLAIRSTO } from '../../../modules/local/clairsto/main.nf' +include { VCFSPLIT } from '../../../modules/local/vcfsplit/main.nf' + +// IMPORT SUBWORKFLOWS +include { DEEPVARIANT } from '../../../subworkflows/nf-core/deepvariant/main.nf' +include { DEEPSOMATIC } from '../../../subworkflows/local/deepsomatic.nf' +include { SMALL_VARIANT_CONSENSUS as GERMLINE_CONSENSUS } from '../../../subworkflows/local/small_variant_consensus.nf' +include { SMALL_VARIANT_CONSENSUS as SOMATIC_CONSENSUS } from '../../../subworkflows/local/small_variant_consensus.nf' + + +workflow TUMORONLY_SMALLVAR { + + take: + tumor_bams // [meta, tumor_bam, tumor_bai] -- tumor-only aligned BAMs (no matched normal) + fasta // [[:], fasta] + fai // [[:], fai] + pon_channel // [ [pon_vcf_path, ...], [is_population_allele_flag, ...] ] + // used by ClairS-TO to filter germline variants with population allele databases + + main: + + ch_versions = channel.empty() + somatic_vcf = channel.empty() + germline_vcf = channel.empty() + somatic_tbi = channel.empty() + germline_tbi = channel.empty() + + // CLAIRS-TO: somatic AND germline variant calling from tumor-only BAM + // ClairS-TO uses a panel-of-normals / population allele database to separate somatic from germline + // Runs if either somatic or germline clair calling is requested (produces both jointly) + + if(params.somatic_var_keep.contains('clair') || params.germline_var_keep.contains('clair')) { + // Append model name and PoN info to build the full CLAIRSTO input + tumor_bams + .map { meta, bam, bai -> + return [ meta, bam, bai, meta.clairSTO_model] + } + .combine(pon_channel) + .set{ clairsto_input_ch} + // clairsto_input_ch: [meta, bam, bai, clairSTO_model_str, [pon_vcf_paths], [pon_flags]] + + // + // MODULE: CLAIRSTO (label: process_high) + // Input: [meta, bam, bai, model_str, [pon_vcfs], [pon_flags]] + // fasta / fai + // Output: .snv_vcf -- [meta, vcf] -- SNV calls (germline + somatic, unsplit) + // .indel_vcf -- [meta, vcf] -- indel calls (germline + somatic, unsplit) + // + CLAIRSTO ( + clairsto_input_ch, + fasta, + fai + ) + + // SPLIT CLAIRSTO GERMLINE AND SOMATIC VARIATION + // ClairS-TO outputs a combined VCF with FILTER tags indicating somatic/germline status; + // VCFSPLIT separates these into two VCFs + + CLAIRSTO.out.indel_vcf + .join(CLAIRSTO.out.snv_vcf) + .set{ clairsto_combined_vcf } + // clairsto_combined_vcf: [meta, indel_vcf, snv_vcf] + + // + // MODULE: VCFSPLIT (label: process_single) + // Input: [meta, indel_vcf, snv_vcf] -- combined ClairS-TO output + // Output: .germline_vcf -- [meta, vcf] -- germline variants only + // .germline_tbi -- [meta, tbi] + // .somatic_vcf -- [meta, vcf] -- somatic variants only + // .somatic_tbi -- [meta, tbi] + // + VCFSPLIT ( + clairsto_combined_vcf + ) + + VCFSPLIT.out.germline_vcf + .join(VCFSPLIT.out.germline_tbi) + .map { meta, vcf, tbi -> + def new_meta = meta + [caller:'clairs-to'] + return [ new_meta, vcf, tbi] + } + .set{clairsto_germline_ch} + // clairsto_germline_ch: [meta(+caller:'clairs-to'), vcf, tbi] -- germline variants + + VCFSPLIT.out.somatic_vcf + .join(VCFSPLIT.out.somatic_tbi) + .map { meta, vcf, tbi -> + def new_meta = meta + [caller:'clairs-to'] + return [ new_meta, vcf, tbi] + } + .set{clairsto_somatic_ch} + // clairsto_somatic_ch: [meta(+caller:'clairs-to'), vcf, tbi] -- somatic variants + } + + // DEEPVARIANT: germline-only variant calling (no somatic mode for tumor-only) + if(params.germline_var_keep.contains('deepvariant')) { + + // + // SUBWORKFLOW: DEEPVARIANT (nf-core) + // Input: [meta, bam, bai, []] -- [] = genome-wide (no interval list) + // fasta / fai / [[:],[]] x2 -- empty PAR/GFF + // Output: .vcf -- [meta, vcf] + // .vcf_index -- [meta, tbi] + // + tumor_bams + .map { meta, bam, bai -> + def intervals = [] + return [meta,bam,bai, intervals] + } + .set{deepvariant_input_ch} + // deepvariant_input_ch: [meta, bam, bai, []] + + DEEPVARIANT ( + deepvariant_input_ch, + fasta, + fai, + [[:],[]], // PAR regions (not used) + [[:],[]] // GFF annotation (not used) + ) + + DEEPVARIANT.out.vcf + .join(DEEPVARIANT.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepvariant'] + return [new_meta, vcf, tbi] + } + .set{deepvariant_ch} + // deepvariant_ch: [meta(+caller:'deepvariant'), vcf, tbi] + } + + // COMBINE GERMLINE VARIANTS + // If both callers requested: run consensus; otherwise pass through single-caller output + if (params.germline_var_keep.size() > 1) { + clairsto_germline_ch + .mix(deepvariant_ch) + .set{combined_germline_ch} + // combined_germline_ch: [meta(+caller), vcf, tbi] -- one item per caller per sample + + // SUBWORKFLOW: GERMLINE_CONSENSUS (SMALL_VARIANT_CONSENSUS alias) + GERMLINE_CONSENSUS( + combined_germline_ch, + fasta, + fai, + params.prioritize_caller_germline, + params.germline_var_combine + ) + GERMLINE_CONSENSUS.out.vcf + .join(GERMLINE_CONSENSUS.out.tbi) + .set{germline_vcf} + // germline_vcf: [meta(+caller from consensus), vcf, tbi] + } + else if (params.germline_var_keep == ['clair']) { + clairsto_germline_ch + .set{germline_vcf} + } + else if (params.germline_var_keep == ['deepvariant']) { + deepvariant_ch + .set{germline_vcf} + } + + // DEEPSOMATIC: somatic variant calling in tumor-only mode (no matched normal) + // Normal BAM/BAI are passed as empty lists; DeepSomatic uses the model's internal normal baseline + if(params.somatic_var_keep.contains('deepsomatic')) { + tumor_bams + .map { meta, tumor_bam, tumor_bai -> + def normal_bam = [] + def normal_bai = [] + return [meta,normal_bam,normal_bai,tumor_bam,tumor_bai] + } + .set{deepsomatic_input_ch} + // deepsomatic_input_ch: [meta, [], [], tumor_bam, tumor_bai] + // empty normal_bam/bai signals tumor-only mode to DEEPSOMATIC subworkflow + + // + // SUBWORKFLOW: DEEPSOMATIC (local) + // Input: [meta, [], [], tumor_bam, tumor_bai] -- tumor-only (no normal) + // [[:],[]] / fasta / fai / [[:],[]] + // Output: .vcf -- [meta, vcf] + // .vcf_index -- [meta, tbi] + // + DEEPSOMATIC ( + deepsomatic_input_ch, + [[:],[]], // intervals (empty = genome-wide) + fasta, + fai, + [[:],[]] // GZI (empty if FASTA is uncompressed) + ) + DEEPSOMATIC.out.vcf + .join(DEEPSOMATIC.out.vcf_index) + .map{ meta, vcf, tbi -> + def new_meta = meta + [caller:'deepsomatic'] + return [new_meta, vcf, tbi] + } + .set{deepsomatic_ch} + // deepsomatic_ch: [meta(+caller:'deepsomatic'), vcf, tbi] + } + + // COMBINE SOMATIC VARIATION + if (params.somatic_var_keep.size() > 1) { + clairsto_somatic_ch + .mix(deepsomatic_ch) + .set{combined_somatic_ch} + // combined_somatic_ch: [meta(+caller), vcf, tbi] -- one item per caller per sample + + // SUBWORKFLOW: SOMATIC_CONSENSUS (SMALL_VARIANT_CONSENSUS alias) + SOMATIC_CONSENSUS( + combined_somatic_ch, + fasta, + fai, + params.prioritize_caller_somatic, + params.somatic_var_combine + ) + SOMATIC_CONSENSUS.out.vcf + .join(SOMATIC_CONSENSUS.out.tbi) + .set{somatic_vcf} + // somatic_vcf: [meta(+caller from consensus), vcf, tbi] + } + else if (params.somatic_var_keep == ['clair']) { + clairsto_somatic_ch + .set{somatic_vcf} + } + else if (params.somatic_var_keep == ['deepsomatic']) { + deepsomatic_ch + .set{somatic_vcf} + } + + // Strip 'caller' from meta before emitting both VCFs + somatic_vcf + .map{ meta, vcf, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, vcf, tbi] + } + .set{somatic_vcf} + + germline_vcf + .map{ meta, vcf, tbi -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, vcf, tbi] + } + .set{germline_vcf} + + emit: + somatic_vcf // [meta, vcf, tbi] -- final somatic VCF (ClairS-TO, DeepSomatic, or consensus) + germline_vcf // [meta, vcf, tbi] -- final germline VCF (ClairS-TO germline, DeepVariant, or consensus) + + +} diff --git a/subworkflows/local/tumor_only_happhase.nf b/subworkflows/local/tumor_only_happhase.nf deleted file mode 100644 index 24095c56..00000000 --- a/subworkflows/local/tumor_only_happhase.nf +++ /dev/null @@ -1,169 +0,0 @@ -include { CLAIRSTO } from '../../modules/local/clairsto/main.nf' -include { VCFSPLIT } from '../../modules/local/vcfsplit/main.nf' -include { LONGPHASE_PHASE } from '../../modules/nf-core/longphase/phase/main' -include { LONGPHASE_HAPLOTAG } from '../../modules/nf-core/longphase/haplotag/main.nf' -include { SAMTOOLS_INDEX } from '../../modules/nf-core/samtools/index/main.nf' - -workflow TUMOR_ONLY_HAPPHASE { - - take: - tumor_bams - fasta - fai - dbsnp - colors - onekgenomes - gnomad - - main: - - ch_versions = channel.empty() - tumor_only_severus = channel.empty() - somatic_vep = channel.empty() - germline_vep = channel.empty() - - tumor_bams - .map{ meta, bam, bai -> - return [meta, bam, bai, meta.clairSTO_model] - } - .set{ tumor_bams } - // [meta, bam, bai, clairSTO_model] -- ClairS-TO model string appended for CLAIRSTO input - - // - // MODULE: CLAIRSTO - // - // call somatic/non-somatic variants - // (* not called as germline * just non-somatic) - - CLAIRSTO ( - tumor_bams, - fasta, - fai, - dbsnp, - colors, - onekgenomes, - gnomad - ) - - CLAIRSTO.out.indel_vcf - .join(CLAIRSTO.out.snv_vcf) - .set{ clairsto_vcf } - // [meta, indel_vcf, snv_vcf] -- raw ClairS-TO variant calls - - // - // MODULE: VCFSPLIT - // - // ClairSTO gives outputs in snv.vcf and indel.vcf - // reformats them to be in somatic.vcf and nonsomatic.vcf - - VCFSPLIT ( - clairsto_vcf - ) - - // Add the nonsomatic vcf info - // remove model info - tumor_bams - .join(VCFSPLIT.out.germline_vcf) - .map{ meta, bam, bai, _model, snps -> - def svs = [] - def mods = [] - return[meta, bam, bai, snps, svs, mods] - } - .set{ tumor_bams_germlinevcf } - // [meta, bam, bai, nonsomatic_vcf, [], []] -- non-somatic variants used for phasing; svs and mods are empty placeholders for LONGPHASE_PHASE input - - VCFSPLIT.out.somatic_vcf - .map { meta, vcf -> - def extra = [] - return [meta,vcf, extra] - } - .set { somatic_vep } - // [meta, somatic_vcf, []] -- PASS (somatic) variants for VEP annotation - - // - // MODULES: LONGPHASE_PHASE - // - // Phase tumor bams on nonsomatic vcf - LONGPHASE_PHASE ( - tumor_bams_germlinevcf, - fasta, - fai - ) - - ch_versions = ch_versions.mix(LONGPHASE_PHASE.out.versions) - - LONGPHASE_PHASE.out.snv_vcf - .map { meta, vcf -> - def extra = [] - return [meta,vcf, extra] - } - .set { germline_vep } - // [meta, germline_vcf, []] -- non-somatic variants (relabelled PASS) for VEP annotation - - - // Add phased nonsomatic vcf info - // remove model info - tumor_bams - .join(LONGPHASE_PHASE.out.snv_vcf) - .map { meta, bam, bai, _model, vcf -> - def new_meta = meta + [type: "tumor"] - def svs = [] - def mods = [] - return [new_meta, bam, bai, vcf, svs, mods] - } - .set{ tumor_bams_phasedvcf } - // [meta+{type:"tumor"}, bam, bai, phased_nonsomatic_vcf, [], []] -- type added; svs and mods are empty placeholders for LONGPHASE_HAPLOTAG - - // - // MODULES: LONGPHASE_HAPLOTAG - // - // Haplotag the tumor bams - - LONGPHASE_HAPLOTAG ( - tumor_bams_phasedvcf, - fasta, - fai - ) - - ch_versions = ch_versions.mix(LONGPHASE_HAPLOTAG.out.versions) - - // grab phased bams - LONGPHASE_HAPLOTAG.out.bam - .set{ haplotagged_bams } - // [meta+{type:"tumor"}, haplotagged_bam] - - // - // MODULES: SAMTOOLS_INDEX - // - // index the haplotagged bams - SAMTOOLS_INDEX ( - haplotagged_bams - ) - - // join information and the phased VCF file - haplotagged_bams - .join(SAMTOOLS_INDEX.out.bai) - .join(LONGPHASE_PHASE.out.snv_vcf) - .join(LONGPHASE_PHASE.out.snv_vcf_index) - .map{ meta, hap_bam, hap_bai, vcf, tbi -> - def new_meta = [id: meta.id, - paired_data: meta.paired_data, - platform: meta.platform, - sex: meta.sex, - fiber: meta.fiber, - clair3_model: meta.clair3_model, - clairS_model: meta.clairS_model, - clairSTO_model: meta.clairSTO_model, - kinetics: meta.kinetics] - return [new_meta, hap_bam, hap_bai, [], [], vcf, tbi] - } - .set{ tumor_only_severus } - // [meta, hap_bam, hap_bai, [], [], phased_vcf, phased_tbi] -- normal_bam and normal_bai are [] (tumor-only mode) - - emit: - tumor_only_severus - somatic_vep - germline_vep - versions = ch_versions - -} diff --git a/subworkflows/local/utils_nfcore_lrsomatic_pipeline/main.nf b/subworkflows/local/utils_nfcore_lrsomatic_pipeline/main.nf index 94d8789b..34db6e1f 100644 --- a/subworkflows/local/utils_nfcore_lrsomatic_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_lrsomatic_pipeline/main.nf @@ -84,17 +84,23 @@ workflow PIPELINE_INITIALISATION { // Create channel from input file provided through params.input // + // Parse the input samplesheet CSV and build a per-sample BAM channel + // Each samplesheet row describes one tumor (+ optional normal) sample + // Columns: sample_id, bam_tumor, bam_normal, method, sex, fiber, + // clair3_model, clairSTO_model, clairS_model, tumor_replicate, normal_replicate channel .fromList(samplesheetToList(params.input, "${projectDir}/assets/schema_input.json")) + // Step 1: build a combined meta map from the samplesheet columns + // paired_data = true if a normal BAM is present; false for tumor-only .map { meta, bam_tumor, bam_normal, method, sex, fiber, clair3_model, clairSTO_model, clairS_model, tumor_replicate, normal_replicate -> def real_clair3_model = (clair3_model == null ) ? null : clair3_model def real_clairS_model = (clairS_model == null ) ? null : clairS_model def real_clairSTO_model = (clairSTO_model == null ) ? null : clairSTO_model def paired_data = bam_normal ? true : false def meta_info = meta + [ paired_data: paired_data, - platform: method, - sex: sex, - fiber: fiber, + platform: method, // 'ont' or 'pb' + sex: sex, // 'XX', 'XY', or null (for ASCAT) + fiber: fiber, // 'y' or 'n' (fiber-seq data flag) clair3_model: real_clair3_model, clairS_model: real_clairS_model, clairSTO_model: real_clairSTO_model, @@ -102,9 +108,13 @@ workflow PIPELINE_INITIALISATION { normal_replicate: normal_replicate] return [ meta_info, [ bam_tumor ], [ bam_normal ?: [] ] ] } + // Flatten BAM lists (handles multi-run entries where bam_tumor/bam_normal are lists) .map { meta, bam_tumor, bam_normal -> [ meta, bam_tumor.flatten(), bam_normal.flatten() ] } + // Step 2: split each row into separate tumor and normal items + // flatMap emits 1 item (tumor-only) or 2 items (tumor + normal) per samplesheet row + // Each item gets type='tumor' or type='normal' and the appropriate replicate ID .flatMap { meta, tumor_bam, normal_bam -> def meta_tumor = meta.clone() meta_tumor.type = 'tumor' @@ -120,6 +130,7 @@ workflow PIPELINE_INITIALISATION { 'clairSTO_model', 'replicate') def result = [[meta_tumor, tumor_bam]] + // result so far: [[meta_tumor, [tumor_bam_path...]]] if (normal_bam) { def meta_normal = meta.clone() @@ -136,17 +147,24 @@ workflow PIPELINE_INITIALISATION { 'clairSTO_model', 'replicate') result << [meta_normal, normal_bam] + // result now: [[meta_tumor, [tumor_bams]], [meta_normal, [normal_bams]]] } return result } .set { ch_samplesheet } - - // ch_samplesheet -> meta: [id, paired_data, platform, sex, type] - // bam: unaligned bams + // ch_samplesheet: [meta, [bam...]] + // meta fields: id, paired_data, type ('tumor'|'normal'), platform ('ont'|'pb'), + // sex, fiber ('y'|'n'), clair3_model, clairS_model, clairSTO_model, replicate + // paired_data: true for both items in a T/N pair (same value for tumor AND normal rows) + // bam: list of paths (multiple runs for same sample remain as a list until SAMTOOLS_CAT) + // + // NOTE: tumor-only rows emit ONE item (type='tumor', paired_data=false) + // paired rows emit TWO items — tumor (paired_data=true) + normal (paired_data=true) + // Both share the same 'id' to allow downstream joins emit: - samplesheet = ch_samplesheet + samplesheet = ch_samplesheet // [meta, [bam...]] -- see channel structure above versions = ch_versions } diff --git a/subworkflows/nf-core/deepvariant/README.md b/subworkflows/nf-core/deepvariant/README.md new file mode 100644 index 00000000..6f816c22 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/README.md @@ -0,0 +1,8 @@ +# DeepVariant subworkflow + +Usage: the input channel should contain tuples of three elements: `meta`, an alignment file in bam or +cram format, and a corresponding index. + +It is very important that the input channel's `meta` is unique for all the input elements, because the subworkflow does a join on `meta`. + +Please note the important configuration items listed in the `deepvariant` module's README file. It is required to use the configuration to specify the input "channels" (data types to extract from bam file) for `DEEPVARIANT_MAKEEXAMPLES`, and the model to run for `DEEPVARIANT_CALLVARIANTS`. The correct arguments for a specific model (data type) can be determined by manually using the `run_deepvariant` command from the Docker / Singularity image with the `--dry_run` option. diff --git a/subworkflows/nf-core/deepvariant/deepvariant.diff b/subworkflows/nf-core/deepvariant/deepvariant.diff new file mode 100644 index 00000000..691bc284 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/deepvariant.diff @@ -0,0 +1,23 @@ +Changes in component 'nf-core/deepvariant' +'subworkflows/nf-core/deepvariant/README.md' is unchanged +'subworkflows/nf-core/deepvariant/meta.yml' is unchanged +Changes in 'deepvariant/main.nf': +--- subworkflows/nf-core/deepvariant/main.nf ++++ subworkflows/nf-core/deepvariant/main.nf +@@ -6,7 +6,7 @@ + take: + ch_input // channel: [ val(meta), path(input), path(index), path(intervals)] + ch_fasta // channel: [ val(meta2), path(fasta) ] +- ch_fai // channel: [ val(meta3), path(fail) ] ++ ch_fai // channel: [ val(meta3), path(fai) ] + ch_gzi // channel: [ val(meta4), path(gzi) ] + ch_par_bed // channel: [ val(meta5), path(par_bed) ] + + +'subworkflows/nf-core/deepvariant/tests/main.nf.test' is unchanged +'subworkflows/nf-core/deepvariant/tests/equality.nf.test' is unchanged +'subworkflows/nf-core/deepvariant/tests/disable-small-model.conf' is unchanged +'subworkflows/nf-core/deepvariant/tests/deepvariant-workflow-and-process-equality-tester.nf' is unchanged +'subworkflows/nf-core/deepvariant/tests/main.nf.test.snap' is unchanged +'subworkflows/nf-core/deepvariant/tests/nextflow.config' is unchanged +************************************************************ diff --git a/subworkflows/nf-core/deepvariant/main.nf b/subworkflows/nf-core/deepvariant/main.nf new file mode 100644 index 00000000..0b358ff7 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/main.nf @@ -0,0 +1,46 @@ +include { DEEPVARIANT_MAKEEXAMPLES } from '../../../modules/nf-core/deepvariant/makeexamples/main' +include { DEEPVARIANT_CALLVARIANTS } from '../../../modules/nf-core/deepvariant/callvariants/main' +include { DEEPVARIANT_POSTPROCESSVARIANTS } from '../../../modules/nf-core/deepvariant/postprocessvariants/main' + +workflow DEEPVARIANT { + take: + ch_input // channel: [ val(meta), path(input), path(index), path(intervals)] + ch_fasta // channel: [ val(meta2), path(fasta) ] + ch_fai // channel: [ val(meta3), path(fai) ] + ch_gzi // channel: [ val(meta4), path(gzi) ] + ch_par_bed // channel: [ val(meta5), path(par_bed) ] + + main: + + DEEPVARIANT_MAKEEXAMPLES(ch_input, ch_fasta, ch_fai, ch_gzi, ch_par_bed) + + DEEPVARIANT_CALLVARIANTS(DEEPVARIANT_MAKEEXAMPLES.out.examples) + + // Input to postprocessing step needs both the gvcfs from MAKEEXAMPLES and the variant + // calls from CALLVARIANTS. Joining on meta, which is assumed to be unique. + ch_intervals = ch_input.map { meta, _input, _index, intervals -> [ meta, intervals ] } + + ch_postproc_input = DEEPVARIANT_CALLVARIANTS.out.call_variants_tfrecords.join( + DEEPVARIANT_MAKEEXAMPLES.out.gvcf, + failOnMismatch: true + ).join( + DEEPVARIANT_MAKEEXAMPLES.out.small_model_calls, + failOnMismatch: true + ).join( + ch_intervals, + failOnMismatch: true + ) + + DEEPVARIANT_POSTPROCESSVARIANTS( + ch_postproc_input, + ch_fasta, + ch_fai, + ch_gzi + ) + + emit: + vcf = DEEPVARIANT_POSTPROCESSVARIANTS.out.vcf + vcf_index = DEEPVARIANT_POSTPROCESSVARIANTS.out.vcf_index + gvcf = DEEPVARIANT_POSTPROCESSVARIANTS.out.gvcf + gvcf_index = DEEPVARIANT_POSTPROCESSVARIANTS.out.gvcf_index +} diff --git a/subworkflows/nf-core/deepvariant/meta.yml b/subworkflows/nf-core/deepvariant/meta.yml new file mode 100644 index 00000000..bd459a62 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/meta.yml @@ -0,0 +1,77 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json +name: deepvariant +description: DeepVariant is an analysis pipeline that uses a deep neural network to call genetic variants from next-generation DNA sequencing data +keywords: + - variant calling + - machine learning + - neural network +components: + - deepvariant/makeexamples + - deepvariant/callvariants + - deepvariant/postprocessvariants +input: + - ch_input: + type: list + description: | + Input aligned reads in bam or cram format, with index, and optional intervals BED file + Structure: [ val(meta), path(bam_or_cram), path(bai_or_crai), path(intervals_bed) ] + - ch_fasta: + type: file + description: | + Reference genome + Structure: [ val(meta2), path(fasta) ] + - ch_fai: + type: string + description: | + Reference genome index in fai format + Structure: [ val(meta3), path(fai) ] + - ch_gzi: + type: string + description: | + Reference genome index in gzi format (either gzi or fai should be used) + Structure: [ val(meta4), val(gzi) ] + - ch_par_bed: + type: string + description: | + bed file of pseudoautosomal regions (optional) + Structure: [ val(meta5), val(par_bed) ] + pattern: "*.bed" +output: + - vcf: + type: file + description: | + Variant calls + Structure: [ val(meta), path(vcf) ] + pattern: "*.vcf.gz" + - vcf_tbi: + type: file + description: | + Index for variant call file + Structure: [ val(meta), path(vcf_tbi) ] + pattern: "*.tbi" + - gvcf: + type: file + description: | + Variant call file with genomic coverage information + Structure: [ val(meta), path(gvcf) ] + pattern: "*.g.vcf.gz" + - gvcf_tbi: + type: file + description: | + Index for the GVCF. + Structure: [ val(meta), path(gvcf_tbi) ] + pattern: "*.tbi" + - versions: + type: file + description: | + File containing software versions + Structure: path(versions.yml) + pattern: "versions.yml" +authors: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" +maintainers: + - "@abhi18av" + - "@ramprasadn" + - "@fa2k" diff --git a/subworkflows/nf-core/deepvariant/tests/deepvariant-workflow-and-process-equality-tester.nf b/subworkflows/nf-core/deepvariant/tests/deepvariant-workflow-and-process-equality-tester.nf new file mode 100644 index 00000000..83a16d55 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/deepvariant-workflow-and-process-equality-tester.nf @@ -0,0 +1,22 @@ +include { DEEPVARIANT_RUNDEEPVARIANT } from '../../../../modules/nf-core/deepvariant/rundeepvariant/main' +include { DEEPVARIANT } from '../main' + +workflow DEEPVARIANT_WORKFLOW_AND_PROCESS_EQUALITY_TESTER { + take: + ch_input // channel: [ val(meta), path(input), path(index), path(intervals)] + ch_fasta // channel: [ val(meta2), path(fasta) ] + ch_fai // channel: [ val(meta3), path(fail) ] + ch_gzi // channel: [ val(meta4), path(gzi) ] + ch_par_bed // channel: [ val(meta5), path(par_bed) ] + + main: + + DEEPVARIANT(ch_input, ch_fasta, ch_fai, ch_gzi, ch_par_bed) + DEEPVARIANT_RUNDEEPVARIANT(ch_input, ch_fasta, ch_fai, ch_gzi, ch_par_bed) + + emit: + wf_vcf = DEEPVARIANT.out.vcf + pc_vcf = DEEPVARIANT_RUNDEEPVARIANT.out.vcf + wf_gvcf = DEEPVARIANT.out.gvcf + pc_gvcf = DEEPVARIANT_RUNDEEPVARIANT.out.gvcf +} diff --git a/subworkflows/nf-core/deepvariant/tests/disable-small-model.conf b/subworkflows/nf-core/deepvariant/tests/disable-small-model.conf new file mode 100644 index 00000000..eb1b53bc --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/disable-small-model.conf @@ -0,0 +1,8 @@ +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + ext.args = '--checkpoint "/opt/models/wgs"' + } + withName: "DEEPVARIANT_CALLVARIANTS" { + ext.args = '--checkpoint "/opt/models/wgs"' + } +} diff --git a/subworkflows/nf-core/deepvariant/tests/equality.nf.test b/subworkflows/nf-core/deepvariant/tests/equality.nf.test new file mode 100644 index 00000000..c4a2276e --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/equality.nf.test @@ -0,0 +1,63 @@ + +nextflow_workflow { + + name "Compare subworkflow DEEPVARIANT to the process DEEPVARIANT_RUNDEEPVARIANT" + script "./deepvariant-workflow-and-process-equality-tester.nf" + config "./nextflow.config" + workflow "DEEPVARIANT_WORKFLOW_AND_PROCESS_EQUALITY_TESTER" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/deepvariant" + + tag "deepvariant" + tag "deepvariant/makeexamples" + tag "deepvariant/callvariants" + tag "deepvariant/postprocessvariants" + tag "deepvariant/rundeepvariant" + + test("ensure that the subworkflow and DEEPVARIANT_RUNDEEPVARIANT have the same output") { + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert path(workflow.out.wf_vcf[0][1]).vcf.variantsMD5 == path(workflow.out.pc_vcf[0][1]).vcf.variantsMD5 }, + { assert path(workflow.out.wf_gvcf[0][1]).vcf.variantsMD5 == path(workflow.out.pc_gvcf[0][1]).vcf.variantsMD5 }, + ) + } + } +} diff --git a/subworkflows/nf-core/deepvariant/tests/main.nf.test b/subworkflows/nf-core/deepvariant/tests/main.nf.test new file mode 100644 index 00000000..d2451980 --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/main.nf.test @@ -0,0 +1,152 @@ +nextflow_workflow { + + name "Test Subworkflow DEEPVARIANT" + script "../main.nf" + config "./nextflow.config" + workflow "DEEPVARIANT" + + tag "subworkflows" + tag "subworkflows_nfcore" + tag "subworkflows/deepvariant" + + tag "deepvariant" + tag "deepvariant/makeexamples" + tag "deepvariant/callvariants" + tag "deepvariant/postprocessvariants" + + test("homo_sapiens - two inputs - bam - fasta - fai") { + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test.paired_end.sorted.bam.bai', checkIfExists: true), + [] + ]) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("homo_sapiens - different samples and regions - cram - fasta - fai") { + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } + + test("homo_sapiens - disable small model - cram - fasta - fai") { + + config "./disable-small-model.conf" + + when { + workflow { + """ + input[0] = Channel.of( + [ + [ id:'test', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/cram/test.paired_end.sorted.cram.crai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ], + [ + [ id:'test2', single_end:false ], // meta map + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/illumina/bam/test2.paired_end.sorted.bam.bai', checkIfExists: true), + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.bed', checkIfExists: true) + ] + ) + input[1] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) + ] + input[2] = [ + [ id:'genome'], + file(params.modules_testdata_base_path + '/genomics/homo_sapiens/genome/genome.fasta.fai', checkIfExists: true) + ] + input[3] = [ + [],[] + ] + input[4] = [ + [],[] + ] + """ + } + } + + + then { + assertAll( + { assert workflow.success }, + { assert snapshot(workflow.out).match() } + ) + } + } +} diff --git a/subworkflows/nf-core/deepvariant/tests/main.nf.test.snap b/subworkflows/nf-core/deepvariant/tests/main.nf.test.snap new file mode 100644 index 00000000..2d14299a --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/main.nf.test.snap @@ -0,0 +1,419 @@ +{ + "homo_sapiens - disable small model - cram - fasta - fai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,0c57956b2f5a0cff8d09a19790ef94f6" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,5f3d98908d46297c7a658654d5bb3015" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,9ae649fed4de493a027697b339bfab36" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,d6a114149024aa8cd74dda2f1c559f5b" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,021f94de713efa7c83d0547f81412dbf" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,86f9c844a90351483c715e7bcc604841" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,3c690275c3d0b55bacb9469199b4d6d8" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,c65395b29f520cf2af04f211f9be2b36" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,021f94de713efa7c83d0547f81412dbf" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,86f9c844a90351483c715e7bcc604841" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,3c690275c3d0b55bacb9469199b4d6d8" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,c65395b29f520cf2af04f211f9be2b36" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,0c57956b2f5a0cff8d09a19790ef94f6" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,5f3d98908d46297c7a658654d5bb3015" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,9ae649fed4de493a027697b339bfab36" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,d6a114149024aa8cd74dda2f1c559f5b" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-29T00:09:09.621357638" + }, + "homo_sapiens - different samples and regions - cram - fasta - fai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,3176f86df96e50687db733c94d9c6689" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,ed7ca1a16bcff42bced0be77ee70662e" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,875b521c835441277a527d41c950e4f5" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,ca6f9ca8d50d339f5d65e4ec4e9a6ea6" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,875b521c835441277a527d41c950e4f5" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,ca6f9ca8d50d339f5d65e4ec4e9a6ea6" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,3176f86df96e50687db733c94d9c6689" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,ed7ca1a16bcff42bced0be77ee70662e" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-29T00:05:25.205895104" + }, + "homo_sapiens - two inputs - bam - fasta - fai": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ] + ], + "1": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ] + ], + "2": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ] + ], + "3": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ] + ], + "gvcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz:md5,89b2e47883a65bb9cae8f173e782bb17" + ] + ], + "gvcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.g.vcf.gz.tbi:md5,1680c67fe988bc1d8220fbb4127c2c18" + ] + ], + "vcf": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz:md5,707212230030c8c3efbe5c2e0428da03" + ] + ], + "vcf_index": [ + [ + { + "id": "test", + "single_end": false + }, + "test.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ], + [ + { + "id": "test2", + "single_end": false + }, + "test2.vcf.gz.tbi:md5,248648ca03f5fda904ebbef8821e0e37" + ] + ] + } + ], + "meta": { + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-01-29T00:01:12.430387646" + } +} \ No newline at end of file diff --git a/subworkflows/nf-core/deepvariant/tests/nextflow.config b/subworkflows/nf-core/deepvariant/tests/nextflow.config new file mode 100644 index 00000000..ad76d4ec --- /dev/null +++ b/subworkflows/nf-core/deepvariant/tests/nextflow.config @@ -0,0 +1,14 @@ +process { + withName: "DEEPVARIANT_MAKEEXAMPLES" { + ext.args = '--checkpoint "/opt/models/wgs" --call_small_model_examples --small_model_indel_gq_threshold "30" --small_model_snp_gq_threshold "25" --small_model_vaf_context_window_size "51" --trained_small_model_path "/opt/smallmodels/wgs"' + } + withName: "DEEPVARIANT_CALLVARIANTS" { + ext.args = '--checkpoint "/opt/models/wgs"' + } + + // This configures RUNDEEPVARIANT, which is used as a reference for the correct test output + withName: DEEPVARIANT_RUNDEEPVARIANT { + ext.args = '--model_type=WGS ' + ext.prefix = { "${meta.id}_out" } + } +} diff --git a/tests/.nftignore b/tests/.nftignore index 7a9121a9..a1de7635 100644 --- a/tests/.nftignore +++ b/tests/.nftignore @@ -25,3 +25,5 @@ pipeline_info/*.{html,json,txt,yml} */qc/{tumor,normal}/nanoplot_ubam_*/*.txt */qc/{tumor,normal}/nanoplot_aln/*.txt */qc/{tumor,normal}/mosdepth/*.txt +*/variants/deepsomatic/*.{vcf.gz,vcf.gz.tbi} +*/variants/deepvariant/*.{vcf.gz,vcf.gz.tbi} diff --git a/tests/default.nf.test b/tests/default.nf.test index 160dc01e..a7a4281c 100644 --- a/tests/default.nf.test +++ b/tests/default.nf.test @@ -42,6 +42,19 @@ nextflow_pipeline { assert file("$launchDir/output/sample3/variants/clairsto/snv.vcf.gz").exists() assert file("$launchDir/output/sample3/variants/clairsto/somatic.vcf.gz").exists() assert file("$launchDir/output/sample3/variants/clairsto/germline.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/deepvariant/sample1.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/deepvariant/sample1.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample2/variants/deepvariant/sample2.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/deepvariant/sample2.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample3/variants/deepvariant/sample3.vcf.gz").exists() + assert file("$launchDir/output/sample3/variants/deepvariant/sample3.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample1/variants/deepsomatic/sample1.vcf.gz").exists() + assert file("$launchDir/output/sample1/variants/deepsomatic/sample1.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample2/variants/deepsomatic/sample2.vcf.gz").exists() + assert file("$launchDir/output/sample2/variants/deepsomatic/sample2.vcf.gz.tbi").exists() + assert file("$launchDir/output/sample3/variants/deepsomatic/sample3.vcf.gz").exists() + assert file("$launchDir/output/sample3/variants/deepsomatic/sample3.vcf.gz.tbi").exists() + }, { assert snapshot( // pipeline versions.yml file for multiqc from which Nextflow version is removed because we test pipelines on multiple Nextflow versions diff --git a/tests/default.nf.test.snap b/tests/default.nf.test.snap index 17d39965..fcdc165a 100644 --- a/tests/default.nf.test.snap +++ b/tests/default.nf.test.snap @@ -2,9 +2,21 @@ "-profile test": { "content": [ { + "BCFTOOLS_ANNOTATE": { + "bcftools": 1.22 + }, "BCFTOOLS_CONCAT": { "bcftools": 1.22 }, + "BCFTOOLS_ISEC": { + "bcftools": 1.22 + }, + "BCFTOOLS_NORM": { + "bcftools": 1.22 + }, + "BCFTOOLS_QUERY": { + "bcftools": 1.22 + }, "BCFTOOLS_SORT": { "bcftools": 1.22 }, @@ -23,17 +35,37 @@ "CRAMINO_PRE": { "cramino": "1.3.0" }, + "DEEPSOMATIC_CALLVARIANTS": { + "deepsomatic": "1.7.0" + }, + "DEEPSOMATIC_MAKEEXAMPLES": { + "deepsomatic": "1.7.0" + }, + "DEEPSOMATIC_POSTPROCESSVARIANTS": { + "deepsomatic": "1.7.0" + }, + "DEEPVARIANT_CALLVARIANTS": { + "deepvariant": "1.9.0" + }, + "DEEPVARIANT_MAKEEXAMPLES": { + "deepvariant": "1.9.0" + }, + "DEEPVARIANT_POSTPROCESSVARIANTS": { + "deepvariant": "1.9.0" + }, "GERMLINE_VEP": { "ensemblvep": 115.2, "perl-math-cdf": 0.1, "tabix": 1.21 }, "LONGPHASE_HAPLOTAG": { - "longphase": 2.0 + "longphase": "2.0.1" + }, + "LONGPHASE_PHASE_GERMLINE": { + "longphase": "2.0.1" }, - "LONGPHASE_PHASE": { - "longphase": 2.0, - "tabix": "1.22.1" + "LONGPHASE_PHASE_SOMATIC": { + "longphase": "2.0.1" }, "METAEXTRACT": { "samtools": 1.21 @@ -257,17 +289,33 @@ "sample1/variants/clairs/indel.vcf.gz.tbi", "sample1/variants/clairs/snvs.vcf.gz", "sample1/variants/clairs/snvs.vcf.gz.tbi", + "sample1/variants/deepsomatic", + "sample1/variants/deepsomatic/sample1.vcf.gz", + "sample1/variants/deepsomatic/sample1.vcf.gz.tbi", + "sample1/variants/deepvariant", + "sample1/variants/deepvariant/sample1.vcf.gz", + "sample1/variants/deepvariant/sample1.vcf.gz.tbi", "sample1/variants/phased", "sample1/variants/phased/germline_smallvariants.vcf.gz", "sample1/variants/phased/germline_smallvariants.vcf.gz.tbi", + "sample1/variants/phased/germline_smallvariants_mod.vcf.gz", + "sample1/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", + "sample1/variants/phased/somatic_smallvariants.vcf.gz", + "sample1/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample1/variants/phased/somatic_smallvariants_mod.vcf.gz", + "sample1/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", "sample1/variants/severus", "sample1/variants/severus/all_SVs", + "sample1/variants/severus/all_SVs/breakpoint_clusters.tsv", + "sample1/variants/severus/all_SVs/breakpoint_clusters_list.tsv", "sample1/variants/severus/all_SVs/severus_all.vcf.gz", "sample1/variants/severus/breakpoints_double.csv", "sample1/variants/severus/read_ids.csv", "sample1/variants/severus/read_qual.txt", "sample1/variants/severus/severus.log", "sample1/variants/severus/somatic_SVs", + "sample1/variants/severus/somatic_SVs/breakpoint_clusters.tsv", + "sample1/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", "sample1/variants/severus/somatic_SVs/severus_somatic.vcf.gz", "sample1/vep", "sample1/vep/SVs", @@ -364,17 +412,33 @@ "sample2/variants/clairs/indel.vcf.gz.tbi", "sample2/variants/clairs/snvs.vcf.gz", "sample2/variants/clairs/snvs.vcf.gz.tbi", + "sample2/variants/deepsomatic", + "sample2/variants/deepsomatic/sample2.vcf.gz", + "sample2/variants/deepsomatic/sample2.vcf.gz.tbi", + "sample2/variants/deepvariant", + "sample2/variants/deepvariant/sample2.vcf.gz", + "sample2/variants/deepvariant/sample2.vcf.gz.tbi", "sample2/variants/phased", "sample2/variants/phased/germline_smallvariants.vcf.gz", "sample2/variants/phased/germline_smallvariants.vcf.gz.tbi", + "sample2/variants/phased/germline_smallvariants_mod.vcf.gz", + "sample2/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", + "sample2/variants/phased/somatic_smallvariants.vcf.gz", + "sample2/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample2/variants/phased/somatic_smallvariants_mod.vcf.gz", + "sample2/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", "sample2/variants/severus", "sample2/variants/severus/all_SVs", + "sample2/variants/severus/all_SVs/breakpoint_clusters.tsv", + "sample2/variants/severus/all_SVs/breakpoint_clusters_list.tsv", "sample2/variants/severus/all_SVs/severus_all.vcf.gz", "sample2/variants/severus/breakpoints_double.csv", "sample2/variants/severus/read_ids.csv", "sample2/variants/severus/read_qual.txt", "sample2/variants/severus/severus.log", "sample2/variants/severus/somatic_SVs", + "sample2/variants/severus/somatic_SVs/breakpoint_clusters.tsv", + "sample2/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", "sample2/variants/severus/somatic_SVs/severus_somatic.vcf.gz", "sample2/vep", "sample2/vep/SVs", @@ -439,17 +503,33 @@ "sample3/variants/clairsto/snv.vcf.gz.tbi", "sample3/variants/clairsto/somatic.vcf.gz", "sample3/variants/clairsto/somatic.vcf.gz.tbi", + "sample3/variants/deepsomatic", + "sample3/variants/deepsomatic/sample3.vcf.gz", + "sample3/variants/deepsomatic/sample3.vcf.gz.tbi", + "sample3/variants/deepvariant", + "sample3/variants/deepvariant/sample3.vcf.gz", + "sample3/variants/deepvariant/sample3.vcf.gz.tbi", "sample3/variants/phased", "sample3/variants/phased/germline_smallvariants.vcf.gz", "sample3/variants/phased/germline_smallvariants.vcf.gz.tbi", + "sample3/variants/phased/germline_smallvariants_mod.vcf.gz", + "sample3/variants/phased/germline_smallvariants_mod.vcf.gz.tbi", + "sample3/variants/phased/somatic_smallvariants.vcf.gz", + "sample3/variants/phased/somatic_smallvariants.vcf.gz.tbi", + "sample3/variants/phased/somatic_smallvariants_mod.vcf.gz", + "sample3/variants/phased/somatic_smallvariants_mod.vcf.gz.tbi", "sample3/variants/severus", "sample3/variants/severus/all_SVs", + "sample3/variants/severus/all_SVs/breakpoint_clusters.tsv", + "sample3/variants/severus/all_SVs/breakpoint_clusters_list.tsv", "sample3/variants/severus/all_SVs/severus_all.vcf.gz", "sample3/variants/severus/breakpoints_double.csv", "sample3/variants/severus/read_ids.csv", "sample3/variants/severus/read_qual.txt", "sample3/variants/severus/severus.log", "sample3/variants/severus/somatic_SVs", + "sample3/variants/severus/somatic_SVs/breakpoint_clusters.tsv", + "sample3/variants/severus/somatic_SVs/breakpoint_clusters_list.tsv", "sample3/variants/severus/somatic_SVs/severus_somatic.vcf.gz", "sample3/vep", "sample3/vep/SVs", @@ -466,52 +546,64 @@ "sample3/vep/somatic/sample3_SOMATIC_VEP.vcf.gz_summary.html" ], [ - "sample1_normal.bam:md5,c4eda86b77d9280bbcb922ab944ce68b", - "sample1_normal.bam.bai:md5,0d0e6451f37ddd209bc8804c50e29e85", - "sample1_tumor.bam:md5,57ad020a25e02fe94e9d6199c396aaf1", - "sample1_tumor.bam.bai:md5,28ffee78ed0ed13e0749e8d08bbb52c6", + "sample1_normal.bam:md5,3ce847c38eb619781e32a10c28e0c35c", + "sample1_normal.bam.bai:md5,8dd8c7fa037badc7097067d5a88672cd", + "sample1_tumor.bam:md5,ed5eb35b63d5e92fa8e461b9a1732b21", + "sample1_tumor.bam.bai:md5,21018d3f1f85be74fd7dc66873219b05", "sample1.flagstat:md5,1c41ea9923945501eb7e41f83a90502d", "sample1.idxstats:md5,902e503387799123ea59255e3fca172c", - "sample1.stats:md5,5a76f92088d36f8e93d72351e521b59b", + "sample1.stats:md5,70fabbdc07dec0479b3fc7dcec344054", "sample1.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", "sample1.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", - "sample1.stats:md5,8cec99bd9c1ba4ee22619b66d4fec02a", - "sample1_whatshap_stats.gtf:md5,eff050a68e36e778b06e0ec19435c569", - "sample1_whatshap_stats.log:md5,76b73731f74fe32ef2d11f6bb0a0f71a", - "sample1_whatshap_stats.tsv:md5,f566ae25b3c5a8f7e94b3d6c1b0417f8", - "breakpoints_double.csv:md5,fd92fe40bc0ab3b836dedc395b80d6e2", - "read_qual.txt:md5,78247dfa2ea336eac0e128eba5e9eef4", - "sample2_normal.bam:md5,32cb1237503f716d788c8d49106d57cc", - "sample2_normal.bam.bai:md5,bda7beaa98b119d07bb61da781af2033", - "sample2_tumor.bam:md5,b9ec59ff00a6c561a614eeb7553c352c", - "sample2_tumor.bam.bai:md5,8342f0fbbc8ce4f4c79aa42c7804df9d", + "sample1.stats:md5,5012c82d3d3ca60ffdd2fb970f772566", + "sample1_whatshap_stats.gtf:md5,428ca0e0f48dc2e3e1b978fa7cf720f3", + "sample1_whatshap_stats.log:md5,5c1f0f79a60a6879b75271fa94b620e8", + "sample1_whatshap_stats.tsv:md5,98582c7e0ff74a2a1978bf70ac9926ee", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", + "breakpoints_double.csv:md5,27b409c73dd0d8bde316545f86ac7f15", + "read_qual.txt:md5,fbe6cd0b65cbfc1ca699e252e531ab72", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", + "sample2_normal.bam:md5,2ecddb6279310240f2dc29b0cf7f8c84", + "sample2_normal.bam.bai:md5,9caebccee8882bcf95e65631c0ac6730", + "sample2_tumor.bam:md5,c8db7cc4b189dec3fadd2cce07d9fcae", + "sample2_tumor.bam.bai:md5,28827d78aa318e26f71b68581ed5c607", "sample2.flagstat:md5,714d0cc0c213e2640e54a16f3d0e6e7e", "sample2.idxstats:md5,72eb83bb11748dc863fef1a0a5497e4b", - "sample2.stats:md5,9afe66549bed997cce97dfa11c0f0cca", + "sample2.stats:md5,87cb6e9adf8a133244e8b331be43bb14", "sample2.flagstat:md5,4344a8745efef9cc2a017024218d61c6", "sample2.idxstats:md5,69467fc02c83a30084736aeea8b785fb", - "sample2.stats:md5,7cbbff1faaf2e030470a8c1e69434b48", - "sample2_whatshap_stats.gtf:md5,4d8f4393e3aebe4e945c0b8236cf3b3e", - "sample2_whatshap_stats.log:md5,10bba7bae6dd99b989ece5e5dac7a8f9", - "sample2_whatshap_stats.tsv:md5,bb46226e486af9026ab76e014624e903", - "breakpoints_double.csv:md5,d3f0957887406fb79f9dcc3707324d8c", - "read_qual.txt:md5,8b92ff7dc4536188be159b95525511cd", - "sample3_tumor.bam:md5,72e067ec99d694f5e1549edca7196054", - "sample3_tumor.bam.bai:md5,4c7386eb59528e81bd48e6a77952f1c3", + "sample2.stats:md5,1e044857eeefb284fda88ee58ff7a04a", + "sample2_whatshap_stats.gtf:md5,a13f0ac1edd7abde4ad013bf2619fe0f", + "sample2_whatshap_stats.log:md5,1d5ed1faca328d3014e9b14a44d18a23", + "sample2_whatshap_stats.tsv:md5,a275209ef9e7885ee5ea3a4aa1c970fd", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", + "breakpoints_double.csv:md5,7d2fe02046bd2ff7138b46d8f67fc755", + "read_qual.txt:md5,fe3f87458d7c0c6591c37e1fd70cecf2", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", + "sample3_tumor.bam:md5,f5dfadf92345cd529de4c1919f82b632", + "sample3_tumor.bam.bai:md5,f8ca34c0584329c62cb70ce8fb942cb6", "sample3.flagstat:md5,8ff32d733c62c4910bf185ef24bf27cf", "sample3.idxstats:md5,2de140e61f9e86c9c10af20dd565cc93", - "sample3.stats:md5,6825d4e497aef80ed7160afbef5076d9", - "sample3_whatshap_stats.gtf:md5,46a97067376b06b476d180709bc9e3d8", - "sample3_whatshap_stats.log:md5,376254ec9c98f9ba204895e7085516ed", - "sample3_whatshap_stats.tsv:md5,f7cc79156f23e884ead18e50b8434dbf", - "breakpoints_double.csv:md5,41bb00e81dd6c319c13e754fa853ca68", - "read_qual.txt:md5,b918430d35354dad1d7f02f21e4cd4ed" + "sample3.stats:md5,d7a8552a8a41a217954a0c825d468a60", + "sample3_whatshap_stats.gtf:md5,d41d8cd98f00b204e9800998ecf8427e", + "sample3_whatshap_stats.log:md5,ca067293878d1760638626a8c5a31432", + "sample3_whatshap_stats.tsv:md5,62beceb9731cafc620ce5c6eb07a9cc9", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50", + "breakpoints_double.csv:md5,a9a0e0a75975904952788c2a0bd3fa85", + "read_qual.txt:md5,25efaa43bb81a4592bfb8f5f08f84b34", + "breakpoint_clusters.tsv:md5,d36a70de292ee130ef30da4a58bced18", + "breakpoint_clusters_list.tsv:md5,0c0ce62e329f8de492487e8414c30a50" ] ], - "timestamp": "2026-03-13T15:33:42.343920246", "meta": { - "nf-test": "0.9.4", - "nextflow": "25.10.0" - } + "nf-test": "0.9.3", + "nextflow": "25.10.2" + }, + "timestamp": "2026-03-27T17:04:12.049740619" } } \ No newline at end of file diff --git a/workflows/lrsomatic.nf b/workflows/lrsomatic.nf index bc0f4c9b..6cf4cad5 100644 --- a/workflows/lrsomatic.nf +++ b/workflows/lrsomatic.nf @@ -37,11 +37,14 @@ include { WHATSHAP_STATS } from '../modules/nf-core/whatshap/ // // IMPORT SUBWORKFLOWS // -include { PREPARE_REFERENCE_FILES } from '../subworkflows/local/prepare_reference_files' -include { PREPARE_ANNOTATION } from '../subworkflows/local/prepare_annotation' -include { BAM_STATS_SAMTOOLS } from '../subworkflows/nf-core/bam_stats_samtools/main' -include { TUMOR_NORMAL_HAPPHASE } from '../subworkflows/local/tumor_normal_happhase' -include { TUMOR_ONLY_HAPPHASE } from '../subworkflows/local/tumor_only_happhase' +include { PREPARE_REFERENCE_FILES } from '../subworkflows/local/prepare_reference_files' +include { PREPARE_ANNOTATION } from '../subworkflows/local/prepare_annotation' +include { BAM_STATS_SAMTOOLS } from '../subworkflows/nf-core/bam_stats_samtools/main' +include { TUMORONLY_SMALLVAR } from '../subworkflows/local/tumor_only/tumoronly_smallvar' +include { PAIRED_SMALLVAR_SOMATIC } from '../subworkflows/local/paired/paired_smallvar_somatic' +include { PAIRED_SMALLVAR_GERMLINE } from '../subworkflows/local/paired/paired_smallvar_germline' +include { PHASING_HAPLOTYPING } from '../subworkflows/local/phasing_haplotyping' + /* @@ -92,10 +95,50 @@ workflow LRSOMATIC { params.bed_file = getGenomeAttribute('bed_file') params.vep_genome = getGenomeAttribute('vep_genome') params.vep_species = getGenomeAttribute('vep_species') - params.dbsnp = getGenomeAttribute('dbsnp') - params.colors = getGenomeAttribute('colors') - params.onekgenomes = getGenomeAttribute('onekgenomes') - params.gnomad = getGenomeAttribute('gnomad') + + if (params.pon_vcfs != null) { + pon_files = params.pon_vcfs.collect { file(it) } + pon_flags = params.pon_flags + } + else if (params.genome == 'GRCh38') { + pon_files = [ + getGenomeAttribute('gnomad'), + getGenomeAttribute('dbsnp'), + getGenomeAttribute('onekgenomes'), + getGenomeAttribute('colors'), + ] + pon_flags = [ + "True", + "True", + "False", + "False" + ] + } + else if (params.genome == 'CHM13') { + pon_files = [ + getGenomeAttribute('gnomad'), + getGenomeAttribute('dbsnp'), + getGenomeAttribute('onekgenomes'), + getGenomeAttribute('colors'), + getGenomeAttribute('asap') + ] + pon_flags = [ + "True", + "True", + "False", + "False", + "False" + ] + } + if (pon_files.size() != pon_flags.size()) { + error "PoN VCFs and allele flags must have same length" + } + Channel + .of( tuple(pon_files, pon_flags) ) + .set { pon_channel } + // pon_channel: [ [pon_vcf_path, ...], [is_population_allele_flag, ...] ] + // -- single tuple of parallel lists; each flag indicates whether the corresponding VCF + // is a population allele database (True) vs. a panel-of-normals artefact file (False) ch_versions = channel.empty() ch_multiqc_files = channel.empty() @@ -105,11 +148,14 @@ workflow LRSOMATIC { // // extracts the base calling model from the bam files + // MODULE: METAEXTRACT (label: process_single) + // Input: [meta, [bam...]] METAEXTRACT( ch_samplesheet ) basecall_meta = METAEXTRACT.out.meta_ext - // [meta, basecall_model_str, kinetics_str] -- basecall model and kinetics extracted from BAM header - // Adds the base calling model to meta.basecall_model + // basecall_meta: [meta, basecall_model_str, kinetics_str] + // basecall_model_str -- e.g. "dna_r10.4.1_e8.2_400bps_sup@v5.0.0" or "hifi_revio" + // kinetics_str -- "true" if PacBio kinetics tags present, else "false" ch_samplesheet .join(basecall_meta) @@ -135,12 +181,20 @@ workflow LRSOMATIC { [ meta, bam.flatten()] } .set{ch_samplesheet} - // [meta_full, [bam...]] -- meta now includes: id, paired_data, type, platform, sex, fiber, clair3_model, clairS_model, clairSTO_model, kinetics - - + // ch_samplesheet (updated): [meta, [bam...]] + // meta fields: id, paired_data, type, platform, sex, fiber, replicate, + // clair3_model, clairS_model, clairSTO_model, kinetics + // bams are grouped per sample (multiple runs merged into a list) // // SUBWORKFLOW: PREPARE_REFERENCE_FILES + // Decompresses the reference FASTA if needed, indexes it, downloads Clair3 models, + // and decompresses ASCAT reference files + // Input: params.fasta, ASCAT file paths, basecall_meta, clair3_modelMap + // Output: .prepped_fasta -- [[:], fasta] + // .prepped_fai -- [[:], fai] + // .downloaded_clair3_models-- [meta(id=model_name), model_dir] + // .allele_files / .loci_files / .gc_file / .rt_file -- flat file collections // PREPARE_REFERENCE_FILES ( @@ -154,13 +208,16 @@ workflow LRSOMATIC { ) downloaded_clair3_models = PREPARE_REFERENCE_FILES.out.downloaded_clair3_models + // downloaded_clair3_models: [meta(id=clair3_model_name), model_dir] ch_nanoplot_pre_txt = channel.empty() if (!params.skip_qc && !params.skip_cramino) { // - // Module: CRAMINO + // MODULE: CRAMINO_PRE (label: process_medium) + // Input: [meta, [bam...]] -- pre-alignment unaligned BAMs + // Output: cramino_pre.out.arrow -- [meta, arrow_file] (feather format stats) // CRAMINO_PRE( ch_samplesheet ) @@ -168,7 +225,9 @@ workflow LRSOMATIC { if (!params.skip_nanoplot) { // - // Module: Nanoplot + // MODULE: NANOPLOT_PRE (label: process_medium) + // Input: CRAMINO_PRE.out.arrow -- [meta, arrow_file] + // Output: nanoplot HTML/txt reports // NANOPLOT_PRE(CRAMINO_PRE.out.arrow) @@ -177,6 +236,7 @@ workflow LRSOMATIC { } + // Drop 'replicate' from meta before concatenation -- replicate info not needed downstream ch_samplesheet .map{ meta, bam -> def new_meta = meta.subMap('id', @@ -192,42 +252,40 @@ workflow LRSOMATIC { return[new_meta, bam] } .set{ch_samplesheet_no_rep} + // ch_samplesheet_no_rep: [meta, [bam...]] + // meta fields: id, paired_data, type, platform, sex, fiber, + // clair3_model, clairS_model, clairSTO_model, kinetics + // (replicate field removed; bams still a list — concatenated next) - - // ch_samplesheet -> meta: [id, paired_data, platform, sex, type, fiber, basecall_model] - // bam: list of unaligned bams + // Branch on number of input BAMs: samples with a single BAM skip concatenation ch_split = ch_samplesheet_no_rep .branch { _meta, bam -> single: bam.size() == 1 multiple: bam.size() > 1 } + // ch_split.single: [meta, [bam]] -- pass-through, no concatenation needed + // ch_split.multiple: [meta, [bam...]] -- need SAMTOOLS_CAT to merge // - // MODULE: SAMTOOLS_CAT + // MODULE: SAMTOOLS_CAT (label: process_single) + // Input: [meta, [bam...]] -- multiple unaligned BAMs for same sample + // Output: .bam -- [meta, bam] -- single merged unaligned BAM // - // concatenates bam files from single sample SAMTOOLS_CAT ( ch_split.multiple ) .bam .mix ( ch_split.single ) .set { ch_cat_ubams } - // [meta, bam] -- single merged unaligned BAM per sample + // ch_cat_ubams: [meta, bam] -- single (possibly concatenated) unaligned BAM per sample vep_cache = channel.empty() if (!params.skip_vep) { - channel - .of([ - vep_cache: params.vep_cache, - vep_cache_version: params.vep_cache_version, - vep_genome: params.vep_genome, - vep_args: params.vep_args, - vep_species: params.vep_species, - download_vep_cache: params.download_vep_cache - ]) - + // SUBWORKFLOW: PREPARE_ANNOTATION + // Validates or downloads the VEP cache directory + // Output: .vep_cache -- path to VEP cache root directory PREPARE_ANNOTATION ( params.vep_cache, params.vep_cache_version, @@ -237,19 +295,21 @@ workflow LRSOMATIC { params.download_vep_cache ) ch_versions = ch_versions.mix(PREPARE_ANNOTATION.out.versions) + // Wrap VEP cache path in a tuple with empty meta for use in ENSEMBLVEP_VEP vep_cache = PREPARE_ANNOTATION.out.vep_cache.map {cache -> [[:], cache] } + // vep_cache: [[:], cache_dir_path] -- empty meta + VEP cache directory } ch_versions = ch_versions.mix(PREPARE_REFERENCE_FILES.out.versions) - ch_fasta = PREPARE_REFERENCE_FILES.out.prepped_fasta - ch_fai = PREPARE_REFERENCE_FILES.out.prepped_fai + ch_fasta = PREPARE_REFERENCE_FILES.out.prepped_fasta // [[:], fasta] + ch_fai = PREPARE_REFERENCE_FILES.out.prepped_fai // [[:], fai] - // ASCAT files - allele_files = PREPARE_REFERENCE_FILES.out.allele_files - loci_files = PREPARE_REFERENCE_FILES.out.loci_files - gc_file = PREPARE_REFERENCE_FILES.out.gc_file - rt_file = PREPARE_REFERENCE_FILES.out.rt_file + // ASCAT reference files -- flat path collections (no meta wrapper), passed directly to ASCAT module + allele_files = PREPARE_REFERENCE_FILES.out.allele_files // [path, ...] -- per-chromosome allele files + loci_files = PREPARE_REFERENCE_FILES.out.loci_files // [path, ...] -- per-chromosome loci files + gc_file = PREPARE_REFERENCE_FILES.out.gc_file // [path, ...] -- GC correction ([] if skipped) + rt_file = PREPARE_REFERENCE_FILES.out.rt_file // [path, ...] -- RT correction ([] if skipped) // // MODULE: FIBERTOOLSRS_PREDICTM6A @@ -257,39 +317,57 @@ workflow LRSOMATIC { // predict m6a in unaligned bam if (!params.skip_fiber) { + // Fiber-seq processing: predict m6A methylation, call nucleosomes and FIRE elements + // Only applicable to PacBio samples with fiber-seq data (meta.fiber == "y") if (!params.skip_normalfiber){ + // Process all samples (including normals) for fiber-seq ubams = ch_cat_ubams } else { + // Skip fiber-seq processing for normal samples; set aside normals to re-join later ch_cat_ubams .branch { meta, _bams -> normal: meta.type == "normal" tumor: meta.type == "tumor" } .set { ch_cat_ubams_normal_branching } + // ch_cat_ubams_normal_branching.normal: [meta, bam] -- normal samples (held out) + // ch_cat_ubams_normal_branching.tumor: [meta, bam] -- tumor samples only normal_bams = ch_cat_ubams_normal_branching.normal ubams = ch_cat_ubams_normal_branching.tumor } + // Branch by sequencing platform: PacBio needs m6A prediction, ONT does not ubams .branch{ meta, _bams -> pacBio: meta.platform == "pb" ont: meta.platform == "ont" } .set{ch_cat_ubams_pacbio_ont_branching} + // ch_cat_ubams_pacbio_ont_branching.pacBio: [meta, bam] -- PacBio samples + // ch_cat_ubams_pacbio_ont_branching.ont: [meta, bam] -- ONT samples (skip m6A) pacbio_bams = ch_cat_ubams_pacbio_ont_branching.pacBio + // Branch PacBio samples: only those with kinetics tags can have m6A predicted pacbio_bams .branch{meta, _bams -> kinetics: meta.kinetics == "true" noKinetics: meta.kinetics == "false" } .set{pacbio_bams} + // pacbio_bams.kinetics: [meta, bam] -- PacBio with kinetics (mm/ml tags); m6A predictable + // pacbio_bams.noKinetics: [meta, bam] -- PacBio without kinetics; skip PREDICTM6A if (!params.skip_m6a) { + // + // MODULE: FIBERTOOLSRS_PREDICTM6A (label: process_high) + // Input: [meta, bam] -- PacBio BAM with kinetics tags + // Output: .bam -- [meta, bam] -- BAM with m6A (MM/ML) tags added + // FIBERTOOLSRS_PREDICTM6A ( pacbio_bams.kinetics ) + // Merge PacBio with and without kinetics: both now have (or skip) m6A tags pacbio_bams.noKinetics .mix(FIBERTOOLSRS_PREDICTM6A.out.bam) .set{predicted_bams} @@ -299,22 +377,28 @@ workflow LRSOMATIC { .mix(pacbio_bams.kinetics) .set{predicted_bams} } + // predicted_bams: [meta, bam] -- all PacBio samples (m6A tags present where applicable) - - + // Re-merge ONT and PacBio before fiber-seq branching ch_cat_ubams_pacbio_ont_branching.ont .mix(predicted_bams) .set{fiber_branch} + // fiber_branch (pre-split): [meta, bam] -- all samples (ONT + PacBio, with m6A if applicable) + // Branch on fiber-seq flag: only fiber-seq samples get nucleosome/FIRE calling fiber_branch .branch{ meta, _bams -> fiber: meta.fiber == "y" nonFiber: meta.fiber == "n" } .set{fiber_branch} + // fiber_branch.fiber: [meta, bam] -- fiber-seq samples → nucleosome + FIRE calling + // fiber_branch.nonFiber: [meta, bam] -- non-fiber samples → passed through unchanged // - // MODULE: FIBERTOOLSRS_NUCLEOSOMES + // MODULE: FIBERTOOLSRS_NUCLEOSOMES (label: process_high) + // Input: [meta, bam] -- fiber-seq BAM (with m6A tags for PacBio) + // Output: .bam -- [meta, bam] -- BAM with nucleosome footprint tags added // FIBERTOOLSRS_NUCLEOSOMES ( @@ -322,7 +406,9 @@ workflow LRSOMATIC { ) // - // MODULE: FIBERTOOLSRS_FIRE + // MODULE: FIBERTOOLSRS_FIRE (label: process_high) + // Input: FIBERTOOLSRS_NUCLEOSOMES.out.bam -- [meta, bam] -- BAM with nucleosome tags + // Output: .bam -- [meta, bam] -- BAM with FIRE (Fiber-seq Inferred Regulatory Elements) tags // FIBERTOOLSRS_FIRE ( @@ -330,22 +416,26 @@ workflow LRSOMATIC { ) if (!params.skip_normalfiber){ + // Re-merge fiber and non-fiber samples after FIRE annotation fiber_branch.nonFiber .mix(FIBERTOOLSRS_FIRE.out.bam) .set{ch_cat_ubams} - } else { + // Re-merge fiber, non-fiber, and held-out normal samples fiber_branch.nonFiber .mix(normal_bams) .mix(FIBERTOOLSRS_FIRE.out.bam) .set{ch_cat_ubams} - } + // ch_cat_ubams (updated): [meta, bam] -- all samples; fiber-seq samples now carry + // nucleosome + FIRE tags in BAM; m6A tags present for PacBio fiber-seq if(!params.skip_qc) { // - // MODULE: FIBERTOOLSRS_QC + // MODULE: FIBERTOOLSRS_QC (label: process_medium) + // Input: FIBERTOOLSRS_FIRE.out.bam -- [meta, bam] -- annotated fiber-seq BAM + // Output: QC reports for fiber-seq signal (written to outdir) // FIBERTOOLSRS_QC ( @@ -354,10 +444,13 @@ workflow LRSOMATIC { } } // - // MODULE: MINIMAP2_ALIGN + // MODULE: MINIMAP2_ALIGN (label: process_high) + // Input: [meta, bam] -- unaligned BAM (may carry m6A/nucleosome/FIRE tags for fiber-seq) + // ch_fasta -- [[:], fasta] + // sort_bam=true, cigar_paf_format='bai', cigar_bam='', split_prefix='' + // Output: .bam -- [meta, bam] -- coordinate-sorted aligned BAM + // .index -- [meta, bai] -- BAM index // - // Aligns ubams - // ch_cat_ubams: [meta, bam] -- may include m6A/nucleosome/FIRE annotations for fiber-seq samples MINIMAP2_ALIGN ( ch_cat_ubams, @@ -369,76 +462,167 @@ workflow LRSOMATIC { ) MINIMAP2_ALIGN.out.bam .set { ch_minimap_bam } - // [meta, bam] -- aligned BAM - - // ch_minimap_bams into tumor and paired to phase the paired ones on normal - // and add index + // ch_minimap_bam: [meta, bam] -- coordinate-sorted aligned BAM + // Join BAM with its index, then branch into paired-sample vs. tumor-only paths ch_minimap_bam .join(MINIMAP2_ALIGN.out.index) + .set {ch_index_minimap} + // ch_index_minimap: [meta, bam, bai] -- aligned BAM + index, all samples + + ch_index_minimap .branch { meta, _bams, _bais -> - paired: meta.paired_data - tumor_only: !meta.paired_data + paired: meta.paired_data // meta.paired_data is the normal sample ID for tumors, or the tumor ID for normals + tumor_only: !meta.paired_data // meta.paired_data is null/false for tumor-only samples } .set { branched_minimap } - // branched_minimap.paired: [meta, bam, bai] -- one item per sample (tumor AND normal flow separately) - // branched_minimap.tumor_only: [meta, bam, bai] - // - // SUBWORKFLOW: TUMOR_NORMAL_HAPPHASE - // - // Phasing/haplotaging/small germline variant calling for tumor-normal samples + // branched_minimap.paired: [meta, bam, bai] -- tumor AND normal samples flow together here; + // each item is a single sample, joined downstream + // branched_minimap.tumor_only: [meta, bam, bai] -- tumor-only samples (no matched normal) - TUMOR_NORMAL_HAPPHASE ( - branched_minimap.paired, + // SUBWORKFLOW: TUMORONLY_SMALLVAR + // Input: branched_minimap.tumor_only -- [meta, bam, bai] + // Output: .somatic_vcf -- [meta, vcf, tbi] -- somatic SNVs/indels + // .germline_vcf -- [meta, vcf, tbi] -- germline SNVs/indels (ClairS-TO germline output) + TUMORONLY_SMALLVAR( + branched_minimap.tumor_only, ch_fasta, ch_fai, - downloaded_clair3_models + pon_channel ) - ch_versions = ch_versions.mix(TUMOR_NORMAL_HAPPHASE.out.versions) + branched_minimap.paired + .set{paired_ch} - // - // SUBWORKFLOW: TUMOR_ONLY_HAPPHASE - // - // Phasing/haplotagging for tumor only samples + // Split paired samples into tumor and normal streams for joining + paired_ch + .branch { meta, _bams, _bais -> + normal: meta.type == "normal" + tumor: meta.type == "tumor" + } + .set{branched_paired_ch} + // branched_paired_ch.normal: [meta, bam, bai] -- normal samples (meta.type == "normal") + // branched_paired_ch.tumor: [meta, bam, bai] -- tumor samples (meta.type == "tumor") - dbsnp = file(params.dbsnp) - colors = file(params.colors) - onekgenomes = file(params.onekgenomes) - gnomad = file(params.gnomad) + // Strip 'type' field from normal meta before joining, so the key is just sample ID + branched_paired_ch.normal + .map{ meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, bam, bai] + } + .set{paired_normal_bams} + // paired_normal_bams: [meta (no type), normal_bam, normal_bai] - TUMOR_ONLY_HAPPHASE ( - branched_minimap.tumor_only, + // Join tumor and normal BAMs into a single channel for somatic variant calling + // Join key is meta (with 'type' stripped), so tumor meta.id must equal normal meta.id + branched_paired_ch.tumor + .map{ meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, bam, bai] + } + .join(paired_normal_bams) + .set { somatic_smallvar_input } + // somatic_smallvar_input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + + // SUBWORKFLOW: PAIRED_SMALLVAR_SOMATIC + // Input: somatic_smallvar_input -- [meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + // Output: .somatic_vcf -- [meta, vcf, tbi] -- somatic SNVs/indels (ClairS and/or DeepSomatic consensus) + PAIRED_SMALLVAR_SOMATIC ( + somatic_smallvar_input, + ch_fasta, + ch_fai + ) + + // SUBWORKFLOW: PAIRED_SMALLVAR_GERMLINE + // Input: branched_paired_ch.normal -- [meta, bam, bai] -- normal sample BAMs only + // downloaded_clair3_models -- [meta(id=model_name), model_dir] + // Output: .germline_vcf -- [meta, vcf, tbi] -- germline SNVs/indels (Clair3 and/or DeepVariant consensus) + PAIRED_SMALLVAR_GERMLINE ( + branched_paired_ch.normal, ch_fasta, ch_fai, - dbsnp, - colors, - onekgenomes, - gnomad + downloaded_clair3_models ) - // Set channel for phased germline variants - germline_vep = TUMOR_NORMAL_HAPPHASE.out.germline_vep.mix(TUMOR_ONLY_HAPPHASE.out.germline_vep) - // [meta, vcf, []] -- germline variants merged from T/N and tumor-only paths + // Merge germline VCFs from paired and tumor-only paths into a single channel + PAIRED_SMALLVAR_GERMLINE.out.germline_vcf + .mix(TUMORONLY_SMALLVAR.out.germline_vcf) + .set{ch_germline_vcf} + // ch_germline_vcf: [meta, vcf, tbi] -- germline variants for all samples (paired + tumor-only) + + // Merge somatic VCFs from tumor-only and paired T/N paths into a single channel + TUMORONLY_SMALLVAR.out.somatic_vcf + .mix(PAIRED_SMALLVAR_SOMATIC.out.somatic_vcf) + .set{ch_somatic_vcf} + // ch_somatic_vcf: [meta, vcf, tbi] -- somatic variants for all samples + + // SUBWORKFLOW: PHASING_HAPLOTYPING + // Input: ch_index_minimap -- [meta, bam, bai] -- all aligned BAMs (tumor + normal + tumor-only) + // ch_germline_vcf -- [meta, vcf, tbi] -- germline variants (used to phase reads) + // ch_somatic_vcf -- [meta, vcf, tbi] -- somatic variants (get phasing transferred) + // ch_fasta / ch_fai + // Output: .phased_germline_vcf -- [meta, vcf, tbi] -- phased germline VCF + // .phased_somatic_vcf -- [meta, vcf, tbi] -- phased somatic VCF + // .tumor_normal_hapbams_ch -- [meta, bam, bai] -- haplotagged BAMs (all samples) + PHASING_HAPLOTYPING ( + ch_index_minimap, + ch_germline_vcf, + ch_somatic_vcf, + ch_fasta, + ch_fai + ) + + // Prepare phased VCFs for VEP: add empty 'extra' list required by ENSEMBLVEP_VEP + PHASING_HAPLOTYPING.out.phased_somatic_vcf + .map { meta, vcf, _tbi -> + def extra = [] + return [meta, vcf, extra] + } + .set { somatic_vep } + // somatic_vep: [meta, vcf, []] -- phased somatic VCF ready for VEP annotation - // Set channel for somatic variants - somatic_vep = TUMOR_NORMAL_HAPPHASE.out.somatic_vep.mix(TUMOR_ONLY_HAPPHASE.out.somatic_vep) - // [meta, vcf, []] -- somatic variants merged from T/N and tumor-only paths + PHASING_HAPLOTYPING.out.phased_germline_vcf + .map { meta, vcf, _tbi -> + def extra = [] + return [meta, vcf, extra] + } + .set { germline_vep } + // germline_vep: [meta, vcf, []] -- phased germline VCF ready for VEP annotation whatshap_stats_txt = channel.empty() if (!params.skip_qc && !params.skip_whatshapstats) { - // Create channel for whatshap stats + // Drop the empty 'extra' element added for VEP input germline_vep .map { meta, vcf, _extra -> return [meta, vcf] } .set { ch_whatshap_stats } + // ch_whatshap_stats: [meta, vcf] -- phased germline VCF for phasing QC // - // Module: WHATSHAP_STATS + // MODULE: WHATSHAP_STATS (label: process_single) + // Input: [meta, vcf] -- phased VCF (germline) + // gtf=true, sample=true, chr_lengths=false + // Output: .tsv -- [meta, tsv] -- per-chromosome phasing statistics // WHATSHAP_STATS ( @@ -455,7 +639,11 @@ workflow LRSOMATIC { if (!params.skip_vep) { // - // MODULE: GERMLINE_VEP + // MODULE: GERMLINE_VEP (ENSEMBLVEP_VEP alias; label: process_medium) + // Input: germline_vep -- [meta, vcf, []] -- phased germline VCF + // vep_cache -- [[:], cache_dir] + // ch_fasta -- [[:], fasta] + // Output: annotated germline VCF with consequence predictions // if (params.vep_custom != null) { vep_custom = file(params.vep_custom) @@ -480,7 +668,11 @@ workflow LRSOMATIC { ) // - // MODULE: SOMATIC_VEP + // MODULE: SOMATIC_VEP (ENSEMBLVEP_VEP alias; label: process_medium) + // Input: somatic_vep -- [meta, vcf, []] -- phased somatic VCF + // vep_cache -- [[:], cache_dir] + // ch_fasta -- [[:], fasta] + // Output: annotated somatic VCF with consequence predictions // SOMATIC_VEP ( @@ -496,23 +688,43 @@ workflow LRSOMATIC { ) } - ch_versions = ch_versions.mix(TUMOR_ONLY_HAPPHASE.out.versions) - - // Get Severus input channel - TUMOR_NORMAL_HAPPHASE.out.tumor_normal_severus - .mix(TUMOR_ONLY_HAPPHASE.out.tumor_only_severus) - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf, tbi -> - return [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf, tbi] + // Build SEVERUS input by combining tumor-only and T/N paired samples with phased germline VCFs + // Tumor-only samples get empty lists for normal BAM/BAI (SEVERUS runs in tumor-only mode) + branched_minimap.tumor_only + .map{ meta, bam, bai -> + def new_meta = meta.subMap('id', + 'paired_data', + 'platform', + 'sex', + 'fiber', + 'clair3_model', + 'clairS_model', + 'clairSTO_model', + 'kinetics') + return[new_meta, bam, bai] } - .set { severus_reformat } - // [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, phased_tbi] -- normal_bam/bai are [] for tumor-only + .map{meta, tumor_bam, tumor_bai-> + def normal_bam = [] + def normal_bai = [] + return [meta, tumor_bam, tumor_bai, normal_bam, normal_bai] + } + // Mix with paired T/N input (which already has normal BAM/BAI from somatic_smallvar_input) + .mix(somatic_smallvar_input) + // Attach phased germline VCF (used by SEVERUS for phased SV calling) + .join(PHASING_HAPLOTYPING.out.phased_germline_vcf) + .set{severus_input} + // severus_input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_germline_vcf, phased_germline_tbi] + // normal_bam/bai are empty lists [] for tumor-only samples // - // MODULE: SEVERUS + // MODULE: SEVERUS (label: process_high) + // Input: severus_input -- [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, vcf, tbi] + // [[:], bed_file, pon_file] -- optional target BED and panel-of-normals for SV filtering + // Output: .all_vcf -- [meta, vcf] -- all somatic SVs (sniffles2 format) // SEVERUS ( - severus_reformat, + severus_input, [[:], params.bed_file, params.pon_file] ) @@ -524,9 +736,14 @@ workflow LRSOMATIC { return [meta, vcf, extra] } .set { sv_vep } - // [meta, severus_all_vcf, []] -- all SVs for VEP annotation + // sv_vep: [meta, severus_all_vcf, []] -- all SVs ready for VEP annotation if(!params.skip_vep) { + // + // MODULE: SV_VEP (ENSEMBLVEP_VEP alias; label: process_medium) + // Input: sv_vep -- [meta, vcf, []] -- SEVERUS SV VCF + // Output: annotated SV VCF with consequence predictions + // SV_VEP ( sv_vep, params.vep_genome, @@ -547,7 +764,9 @@ workflow LRSOMATIC { if (!params.skip_qc && !params.skip_cramino) { // - // MODULE: CRAMINO + // MODULE: CRAMINO_POST (label: process_medium) + // Input: ch_minimap_bam -- [meta, bam] -- post-alignment coordinate-sorted BAM + // Output: .arrow -- [meta, arrow_file] -- alignment statistics in feather format // CRAMINO_POST ( ch_minimap_bam ) @@ -555,7 +774,9 @@ workflow LRSOMATIC { if (!params.skip_nanoplot) { // - // Module: Nanoplot + // MODULE: NANOPLOT_POST (label: process_medium) + // Input: CRAMINO_POST.out.arrow -- [meta, arrow_file] + // Output: HTML/txt QC reports (post-alignment) // NANOPLOT_POST(CRAMINO_POST.out.arrow) @@ -574,12 +795,19 @@ workflow LRSOMATIC { if (!params.skip_qc && !params.skip_mosdepth) { - // prepare mosdepth input channel: we need to specify compulsory path to bed as well + // MOSDEPTH requires a BED file argument; pass [] to compute genome-wide depth ch_minimap_bam.join(MINIMAP2_ALIGN.out.index) .map { meta, bam, bai -> [meta, bam, bai, []] } .set { ch_mosdepth_in } - // [meta, bam, bai, []] -- [] is the required empty BED path for MOSDEPTH + // ch_mosdepth_in: [meta, bam, bai, []] -- [] is the optional BED (empty = genome-wide) + // + // MODULE: MOSDEPTH (label: process_medium) + // Input: [meta, bam, bai, bed] -- bed is [] for genome-wide coverage + // ch_fasta -- [[:], fasta] -- used for CRAM decoding (if applicable) + // Output: .global_txt -- [meta, txt] -- global depth summary + // .summary_txt -- [meta, txt] -- per-contig depth summary + // MOSDEPTH ( ch_mosdepth_in, ch_fasta @@ -590,7 +818,12 @@ workflow LRSOMATIC { } // - // SUBWORKFLOW: BAM_STATS_SAMTOOLS + // SUBWORKFLOW: BAM_STATS_SAMTOOLS (nf-core subworkflow) + // Input: [meta, bam, bai] -- aligned BAM with index + // ch_fasta -- [[:], fasta] + // Output: .stats -- [meta, txt] -- samtools stats output + // .flagstat -- [meta, txt] -- samtools flagstat output + // .idxstats -- [meta, txt] -- samtools idxstats output // ch_bam_stats = channel.empty() ch_bam_flagstat = channel.empty() @@ -599,7 +832,7 @@ workflow LRSOMATIC { if (!params.skip_qc && !params.skip_bamstats ) { BAM_STATS_SAMTOOLS ( - ch_minimap_bam.join(MINIMAP2_ALIGN.out.index), // Join bam channel with index channel + ch_minimap_bam.join(MINIMAP2_ALIGN.out.index), // [meta, bam, bai] ch_fasta ) @@ -609,16 +842,20 @@ workflow LRSOMATIC { } // - // MODULE: ASCAT + // MODULE: ASCAT (label: process_high) + // Input: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] -- NOTE: normal before tumor (ASCAT convention) + // allele_files, loci_files, gc_file, rt_file -- ASCAT reference files + // Output: .png plots, .segments, .purity_ploidy -- copy number results // if (!params.skip_ascat) { - severus_reformat - .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, _vcf -> + // ASCAT expects [normal, tumor] order; rearrange from severus_input [tumor, normal] order + severus_input + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, _vcf, _tbi -> return [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] } .set { ascat_ch } - // [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] -- NOTE: normal before tumor (ASCAT convention) + // ascat_ch: [meta, normal_bam, normal_bai, tumor_bam, tumor_bai] ASCAT ( ascat_ch, @@ -635,16 +872,25 @@ workflow LRSOMATIC { } // - // MODULE: WAKHAN + // MODULE: WAKHAN (label: process_medium) + // Haplotype-aware genome assembly and variant phasing visualisation + // Input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_germline_vcf, severus_all_vcf] + // ch_fasta -- [[:], fasta] + // centromere_bed -- BED file of centromere coordinates (for assembly anchoring) + // Output: WAKHAN assembly reports (written to outdir) // if (!params.skip_wakhan) { - // Prepare input channel for WAKHAN - severus_reformat + // Attach SEVERUS SV VCF to the severus_input channel (dropping the phased TBI) + severus_input .join(SEVERUS.out.all_vcf) + .map { meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, _phased_tbi, all_vcf -> + return [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, all_vcf] + } .set { wakhan_input } - // [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_vcf, phased_tbi, severus_all_vcf] + // wakhan_input: [meta, tumor_bam, tumor_bai, normal_bam, normal_bai, phased_germline_vcf, severus_all_vcf] + // normal_bam/bai are [] for tumor-only samples WAKHAN ( wakhan_input, @@ -654,25 +900,31 @@ workflow LRSOMATIC { } // - // Collate and save software versions + // Collate software versions from two sources: + // 1. ch_versions (classic path): version YAML files emitted by modules + // 2. channel.topic("versions") (topic channel path): version tuples [process, tool, version] + // emitted directly by modules that use the topic-channel pattern // def topic_versions = channel.topic("versions") - .distinct() + .distinct() // deduplicate identical version entries across samples .branch { entry -> - versions_file: entry instanceof Path - versions_tuple: true + versions_file: entry instanceof Path // classic YAML file path + versions_tuple: true // [process, tool, version] tuple } def topic_versions_string = topic_versions.versions_tuple .map { process, tool, version -> + // Strip workflow prefix (everything before the last ':') from process name [ process[process.lastIndexOf(':')+1..-1], " ${tool}: ${version}" ] } - .groupTuple(by:0) + .groupTuple(by:0) // group tool versions by process name .map { process, tool_versions -> tool_versions.unique().sort() "${process}:\n${tool_versions.join('\n')}" } + // topic_versions_string: formatted YAML-like string per process, ready to write + // Merge both version sources and write to versions YAML (consumed by MultiQC) softwareVersionsToYAML(ch_versions.mix(topic_versions.versions_file)) .mix(topic_versions_string) .collectFile( @@ -681,10 +933,14 @@ workflow LRSOMATIC { sort: true, newLine: true ).set { ch_collated_versions } + // ch_collated_versions: path -- merged software versions YAML for MultiQC // - // MODULE: MultiQC + // MODULE: MULTIQC (label: process_single) + // Aggregates QC reports from all modules into a single HTML report + // Input: [[id:'multiqc'], [qc_files...], [config_files...], [logo], [], []] + // Output: .report -- [meta, html] -- MultiQC HTML report // summary_params = paramsSummaryMap( workflow, parameters_schema: "nextflow_schema.json") @@ -706,7 +962,8 @@ workflow LRSOMATIC { ) ) - // Collect MultiQC files + // Collect QC outputs from all optional modules + // .collect{it -> it[1]} extracts the file from [meta, file] tuples; ifEmpty([]) handles skipped modules ch_multiqc_files = ch_multiqc_files.mix(ch_bam_stats.collect{it -> it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_bam_flagstat.collect{it -> it[1]}.ifEmpty([])) ch_multiqc_files = ch_multiqc_files.mix(ch_bam_idxstats.collect{it -> it[1]}.ifEmpty([])) @@ -719,6 +976,7 @@ workflow LRSOMATIC { ch_multiqc_files = ch_multiqc_files.mix(whatshap_stats_txt.collect{it -> it[1]}.ifEmpty([])) + // Build the final MULTIQC input tuple: all QC files + config files + logo MULTIQC ( ch_multiqc_files .collect() @@ -728,6 +986,7 @@ workflow LRSOMATIC { multiqc_config_files += [file(params.multiqc_config, checkIfExists: true)] } def multiqc_logo_file = params.multiqc_logo ? [file(params.multiqc_logo, checkIfExists: true)] : [] + // MULTIQC input: [meta, [qc_files], [config_files], [logo], [], []] [[id: 'multiqc'], files, multiqc_config_files, multiqc_logo_file, [], []] } )