Skip to content

Commit

Permalink
Merge pull request #1376 from nakib103/nextflow_split_opt
Browse files Browse the repository at this point in the history
  • Loading branch information
nuno-agostinho committed Mar 27, 2023
2 parents 1c9cef7 + 6998e7e commit 2e26b23
Show file tree
Hide file tree
Showing 8 changed files with 78 additions and 56 deletions.
2 changes: 2 additions & 0 deletions nextflow/README.md
Expand Up @@ -75,6 +75,8 @@ The following config files are used and can be modified depending on user requir
--chros_file LIST_OF_CHROS_FILE Path to file containing list of chromosomes
--cpus INT Number of CPUs to use. Default 1.
--output_prefix FILENAME_PREFIX Output filename prefix. The generated output file will have name <output_prefix>.vcf.gz
--skip_check [0,1] If set will skip checking of tabix index file for input VCF, we can do this if there a index file already exist. It enables the first module to load from cache if -resume is used. Default: 0
--bin_size SIZE If given there will be further split and each file will contain exactly SIZE number of variants. Enables faster run in expense of more jobs. By default the input file is only split by chromosome (SIZE=0). Default: 0
```
NB: File paths are expected to be absolute paths.
Expand Down
1 change: 1 addition & 0 deletions nextflow/nf_config/nextflow.config
Expand Up @@ -38,3 +38,4 @@ profiles {
//params.chros_file = "$PWD/examples/clinvar-testset/chros.txt"
params.vep_config = "$PWD/nf_config/vep.ini"
params.output_prefix = ""
params.bin_size = 0
Expand Up @@ -32,6 +32,8 @@ process mergeVCF {

cpus params.cpus
container "${params.singularity_dir}/bcftools.sif"

cache 'lenient'


input:
Expand All @@ -43,8 +45,9 @@ process mergeVCF {

script:
"""
bcftools concat ${ vcfFiles } -Oz -o temp-${ mergedVCF}.vcf.gz
bcftools sort -Oz temp-${ mergedVCF}.vcf.gz -o ${ mergedVCF}.vcf.gz
bcftools index -t ${ mergedVCF}.vcf.gz
mkdir -p temp
bcftools concat --no-version -a ${ vcfFiles } -Oz -o temp-${ mergedVCF}.vcf.gz
bcftools sort -T temp -Oz temp-${ mergedVCF}.vcf.gz -o ${ mergedVCF}.vcf.gz
bcftools index -t ${ mergedVCF}.vcf.gz
"""
}
File renamed without changes.
Expand Up @@ -45,7 +45,7 @@ process chrosVEP {
}
else {
"""
vep -i ${vcfFile} -o ${prefix}-${vcfFile} --vcf --compress_output bgzip --format vcf --config ${vep_config}
vep -i ${vcfFile} -o ${prefix}-${vcfFile} --vcf --compress_output bgzip --format vcf --config ${vep_config}
tabix -p vcf ${prefix}-${vcfFile}
"""
}
Expand Down
54 changes: 54 additions & 0 deletions nextflow/nf_modules/split_VCF.nf
@@ -0,0 +1,54 @@
#!/usr/bin/env nextflow

/*
* Script to split a multi-chromosome VCF into single-chromosome VCFs
*/

nextflow.enable.dsl=2

// defaults
prefix = "out"
params.outdir = ""
params.cpus = 1

process splitVCF {
/*
Function to split a multi-chromosome VCF into single chromosome VCF
Returns
-------
Returns 2 files per chromosome:
1) A VCF format file for each splitted chromosome
2) A tabix index for that VCF
*/
cpus params.cpus
container "${params.singularity_dir}/bcftools.sif"

input:
val(chr)
path(vcf)
path(vcf_index)
val(bin_size)

output:
tuple path("${prefix}.${chr}.*vcf.gz"), path("${prefix}.${chr}.*vcf.gz.tbi"), emit: files

script:
"""
bcftools view --no-version -r ${chr} ${vcf} -o ${prefix}.${chr}.vcf.gz -O z
bcftools index -t ${prefix}.${chr}.vcf.gz
if [[ ${bin_size} ]]; then
bcftools query -f'%CHROM\t%POS\n' ${prefix}.${chr}.vcf.gz | split -l ${bin_size}
for file in x*; do
bcftools view --no-version -T \${file} -Oz ${prefix}.${chr}.vcf.gz > ${prefix}.${chr}.\${file}.vcf.gz
bcftools index -t ${prefix}.${chr}.\${file}.vcf.gz
done
rm ${prefix}.${chr}.vcf.gz
rm ${prefix}.${chr}.vcf.gz.tbi
rm x*
fi
"""
}
40 changes: 0 additions & 40 deletions nextflow/nf_modules/split_into_chros.nf

This file was deleted.

26 changes: 14 additions & 12 deletions nextflow/workflows/run_vep.nf
Expand Up @@ -17,10 +17,10 @@ params.chros=""
params.chros_file=""

// module imports
include { splitVCF } from '../nf_modules/split_into_chros.nf'
include { mergeVCF } from '../nf_modules/merge_chros_VCF.nf'
include { chrosVEP } from '../nf_modules/run_vep_chros.nf'
include { readChrVCF } from '../nf_modules/read_chros_VCF.nf'
include { splitVCF } from '../nf_modules/split_VCF.nf'
include { mergeVCF } from '../nf_modules/merge_VCF.nf'
include { chrosVEP } from '../nf_modules/run_vep.nf'
include { readChrVCF } from '../nf_modules/read_VCF.nf'

// print usage
if (params.help) {
Expand Down Expand Up @@ -59,12 +59,14 @@ if(check_bgzipped.exitValue()){
exit 1, "The specified VCF file is not bgzipped: ${params.vcf}"
}

def sout = new StringBuilder(), serr = new StringBuilder()
check_parsing = "$params.singularity_dir/vep.sif tabix -p vcf -f $params.vcf".execute()
check_parsing.consumeProcessOutput(sout, serr)
check_parsing.waitFor()
if( serr ){
exit 1, "The specified VCF file has issues in parsing: $serr"
if ( !params.skip_check ){
def sout = new StringBuilder(), serr = new StringBuilder()
check_parsing = "$params.singularity_dir/vep.sif tabix -p vcf -f $params.vcf".execute()
check_parsing.consumeProcessOutput(sout, serr)
check_parsing.waitFor()
if( serr ){
exit 1, "The specified VCF file has issues in parsing: $serr"
}
}
vcf_index = "${params.vcf}.tbi"

Expand Down Expand Up @@ -97,7 +99,7 @@ log.info params.chros
readChrVCF(params.vcf, vcf_index)
chr = readChrVCF.out.splitText().map{it -> it.trim()}
}
splitVCF(chr, params.vcf, vcf_index)
chrosVEP(splitVCF.out, params.vep_config)
splitVCF(chr, params.vcf, vcf_index, params.bin_size)
chrosVEP(splitVCF.out.files.transpose(), params.vep_config)
mergeVCF(chrosVEP.out.vcfFile.collect(), chrosVEP.out.indexFile.collect())
}

0 comments on commit 2e26b23

Please sign in to comment.