-
Notifications
You must be signed in to change notification settings - Fork 149
/
split_VCF.nf
54 lines (43 loc) · 1.26 KB
/
split_VCF.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env nextflow
/*
* Script to split a multi-chromosome VCF into single-chromosome VCFs
*/
nextflow.enable.dsl=2
// defaults
prefix = "out"
params.outdir = ""
params.cpus = 1
process splitVCF {
/*
Function to split a multi-chromosome VCF into single chromosome VCF
Returns
-------
Returns 2 files per chromosome:
1) A VCF format file for each splitted chromosome
2) A tabix index for that VCF
*/
cpus params.cpus
container "${params.singularity_dir}/bcftools.sif"
input:
val(chr)
path(vcf)
path(vcf_index)
val(bin_size)
output:
tuple path("${prefix}.${chr}.*vcf.gz"), path("${prefix}.${chr}.*vcf.gz.tbi"), emit: files
script:
"""
bcftools view --no-version -r ${chr} ${vcf} -o ${prefix}.${chr}.vcf.gz -O z
bcftools index -t ${prefix}.${chr}.vcf.gz
if [[ ${bin_size} ]]; then
bcftools query -f'%CHROM\t%POS\n' ${prefix}.${chr}.vcf.gz | split -l ${bin_size}
for file in x*; do
bcftools view --no-version -T \${file} -Oz ${prefix}.${chr}.vcf.gz > ${prefix}.${chr}.\${file}.vcf.gz
bcftools index -t ${prefix}.${chr}.\${file}.vcf.gz
done
rm ${prefix}.${chr}.vcf.gz
rm ${prefix}.${chr}.vcf.gz.tbi
rm x*
fi
"""
}