-
Notifications
You must be signed in to change notification settings - Fork 149
/
run_vep.nf
105 lines (93 loc) · 3.36 KB
/
run_vep.nf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
/*
* Workflow to run VEP on chromosome based VCF files
*
* This workflow relies on Nextflow (see https://www.nextflow.io/tags/workflow.html)
*
*/
nextflow.enable.dsl=2
// params default
params.help = false
params.cpus = 1
params.outdir = "outdir"
params.singularity_dir=""
params.vep_config=""
params.chros=""
params.chros_file=""
// module imports
include { splitVCF } from '../nf_modules/split_VCF.nf'
include { mergeVCF } from '../nf_modules/merge_VCF.nf'
include { chrosVEP } from '../nf_modules/run_vep.nf'
include { readChrVCF } from '../nf_modules/read_VCF.nf'
// print usage
if (params.help) {
log.info ''
log.info 'Pipeline to run VEP chromosome-wise'
log.info '-------------------------------------------------------'
log.info ''
log.info 'Usage: '
log.info ' nextflow -C nf_config/nextflow.config run workflows/run_vep.nf --vcf <path-to-vcf> --chros 1,2 --vep_config'
log.info ''
log.info 'Options:'
log.info ' --vcf VCF VCF that will be split. Currently supports sorted and bgzipped file'
log.info ' --outdir DIRNAME Name of output dir. Default: outdir'
log.info ' --vep_config FILENAME VEP config file. Default: nf_config/vep.ini'
log.info ' --chros LIST_OF_CHROS Comma-separated list of chromosomes to generate. i.e. 1,2,... Default: 1,2,...,X,Y,MT'
log.info ' --chros_file LIST_OF_CHROS_FILE Path to file containing list of chromosomes'
log.info ' --cpus INT Number of CPUs to use. Default 1.'
log.info ' --output_prefix FILENAME_PREFIX Output filename prefix. The generated output file will have name <output_prefix>.vcf.gz'
exit 1
}
// Input validation
if( !params.vcf) {
exit 1, "Undefined --vcf parameter. Please provide the path to a VCF file"
}
vcfFile = file(params.vcf)
if( !vcfFile.exists() ) {
exit 1, "The specified VCF file does not exist: ${params.vcf}"
}
check_bgzipped = "bgzip -t $params.vcf".execute()
check_bgzipped.waitFor()
if(check_bgzipped.exitValue()){
exit 1, "The specified VCF file is not bgzipped: ${params.vcf}"
}
if ( !params.skip_check ){
def sout = new StringBuilder(), serr = new StringBuilder()
check_parsing = "$params.singularity_dir/vep.sif tabix -p vcf -f $params.vcf".execute()
check_parsing.consumeProcessOutput(sout, serr)
check_parsing.waitFor()
if( serr ){
exit 1, "The specified VCF file has issues in parsing: $serr"
}
}
vcf_index = "${params.vcf}.tbi"
if ( params.vep_config ){
vepFile = file(params.vep_config)
if( !vepFile.exists() ){
exit 1, "The specified VEP config does not exist: ${params.vep_config}"
}
}
else
{
exit 1, "Undefined --vep_config parameter. Please provide a VEP config file"
}
log.info 'Starting workflow.....'
workflow {
log.info params.chros
if (params.chros){
log.info 'Reading chromosome names from list'
chr_str = params.chros.toString()
chr = Channel.of(chr_str.split(','))
}
else if (params.chros_file) {
log.info 'Reading chromosome names from file'
chr = Channel.fromPath(params.chros_file).splitText().map{it -> it.trim()}
}
else {
log.info 'Computing chromosome names from input'
readChrVCF(params.vcf, vcf_index)
chr = readChrVCF.out.splitText().map{it -> it.trim()}
}
splitVCF(chr, params.vcf, vcf_index, params.bin_size)
chrosVEP(splitVCF.out.files.transpose(), params.vep_config)
mergeVCF(chrosVEP.out.vcfFile.collect(), chrosVEP.out.indexFile.collect())
}