From 194c306709ed7ebb22e89bfda4d5e0d540fb8650 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 13 Apr 2018 13:12:40 +0200 Subject: [PATCH 1/9] add container for vcftools --- containers/vcftools/Dockerfile | 15 +++++++++++++++ containers/vcftools/environment.yml | 9 +++++++++ 2 files changed, 24 insertions(+) create mode 100644 containers/vcftools/Dockerfile create mode 100644 containers/vcftools/environment.yml diff --git a/containers/vcftools/Dockerfile b/containers/vcftools/Dockerfile new file mode 100644 index 0000000000..8995effec8 --- /dev/null +++ b/containers/vcftools/Dockerfile @@ -0,0 +1,15 @@ +FROM nfcore/base:latest + +LABEL \ + author="Maxime Garcia" \ + description="vcftools image used in Sarek 2.0" \ + maintainer="maxime.garcia@scilifelab.se" + +COPY environment.yml / + +RUN \ + conda env create -f /environment.yml && \ + conda clean -a + + # Export PATH +ENV PATH /opt/conda/envs/sarek-vcftools-2.0/bin:$PATH diff --git a/containers/vcftools/environment.yml b/containers/vcftools/environment.yml new file mode 100644 index 0000000000..04faa110e1 --- /dev/null +++ b/containers/vcftools/environment.yml @@ -0,0 +1,9 @@ +# You can use this file to create a conda environment: +# conda env create -f environment.yml +name: sarek-vcftools-2.0 +channels: + - defaults + - conda-forge + - bioconda +dependencies: +- vcftools=0.1.15 From 9d64e64bece63fd5c905e691fcea85e983a90b03 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 13 Apr 2018 13:13:06 +0200 Subject: [PATCH 2/9] update buildContainers.nf script --- buildContainers.nf | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/buildContainers.nf b/buildContainers.nf index 5595b5fa3e..0970eef673 100644 --- a/buildContainers.nf +++ b/buildContainers.nf @@ -192,6 +192,7 @@ def defineContainersList(){ 'snpeff', 'snpeffgrch37', 'snpeffgrch38', + 'vcftools', 'vepgrch37', 'vepgrch38' ] @@ -211,8 +212,8 @@ def helpMessage() { log.info " Default: all" log.info " Possible values:" log.info " all, fastqc, freebayes, gatk, igvtools, multiqc, mutect1" - log.info " picard, qualimap, r-base, runallelecount, sarek" - log.info " snpeff, snpeffgrch37, snpeffgrch38, vepgrch37, vepgrch38" + log.info " picard, qualimap, r-base, runallelecount, sarek, snpeff" + log.info " snpeffgrch37, snpeffgrch38, vcftools, vepgrch37, vepgrch38" log.info " --docker: Build containers using Docker" log.info " --help" log.info " you're reading it" From 3ea0ecb92d53d3ef3c5b527ed541ff4ae016fc58 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 13 Apr 2018 13:13:29 +0200 Subject: [PATCH 3/9] add process RunVcftools --- germlineVC.nf | 63 ++++++++++++++++++++++++++++++++++++++++++++++----- somaticVC.nf | 52 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 108 insertions(+), 7 deletions(-) diff --git a/germlineVC.nf b/germlineVC.nf index 1dd668c5d3..afb1611d14 100644 --- a/germlineVC.nf +++ b/germlineVC.nf @@ -399,8 +399,8 @@ process ConcatVCF { file(genomeIndex) from Channel.value(referenceMap.genomeIndex) output: - set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz") into vcfConcatenated - file("*.vcf.gz.tbi") into vcfConcatenatedTbi + set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfConcatenated + when: ( 'haplotypecaller' in tools || 'mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC @@ -453,8 +453,9 @@ process ConcatVCF { if (params.verbose) vcfConcatenated = vcfConcatenated.view { "Variant Calling output:\n\ - Tool : ${it[0]}\tID : ${it[1]}\tSample: [${it[3]}, ${it[2]}]\n\ - File : ${it[4].fileName}" + Tool : ${it[0]}\tID : ${it[1]}\tSample: ${it[2]}\n\ + Files : ${it[4].fileName}\n\ + Index : ${it[5].fileName}" } process RunSingleStrelka { @@ -549,7 +550,11 @@ if (params.verbose) singleMantaOutput = singleMantaOutput.view { Index : ${it[4].fileName}" } -vcfForBCFtools = Channel.empty().mix( +vcfForQC = Channel.empty().mix( + vcfConcatenated.map { + variantcaller, idPatient, idSampleNormal, idSampleTumor, vcf, tbi -> + [variantcaller, vcf] + }, singleStrelkaOutput.map { variantcaller, idPatient, idSample, vcf, tbi -> [variantcaller, vcf[1]] @@ -559,6 +564,8 @@ vcfForBCFtools = Channel.empty().mix( [variantcaller, vcf[2]] }) +(vcfForBCFtools, vcfForVCFtools) = vcfForQC.into(2) + process RunBcftoolsStats { tag {vcf} @@ -585,6 +592,49 @@ if (params.verbose) bcfReport = bcfReport.view { bcfReport.close() +process RunVcftools { + tag {vcf} + + publishDir directoryMap.vcftools, mode: 'link' + + input: + set variantCaller, file(vcf) from vcfForVCFtools + + output: + file ("${vcf.baseName}.*") into vcfReport + + when: !params.noReports + + script: + """ + vcftools \ + --gzvcf ${vcf} \ + --relatedness2 \ + --out ${vcf.baseName} + + vcftools \ + --gzvcf ${vcf} \ + --TsTv-by-count \ + --out ${vcf.baseName} + + vcftools \ + --gzvcf ${vcf} \ + --TsTv-by-qual \ + --out ${vcf.baseName} + + vcftools \ + --gzvcf ${vcf} \ + --FILTER-summary \ + --out ${vcf.baseName} + """ +} + +if (params.verbose) vcfReport = vcfReport.view { + "VCFTools stats report:\n\ + File : [${it.fileName}]" +} + +vcfReport.close() /* ================================================================================ = F U N C T I O N S = @@ -646,10 +696,11 @@ def defineDirectoryMap() { 'bamQC' : "${params.outDir}/Reports/bamQC", 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats", + 'vcftools' : "${params.outDir}/Reports/VCFTools", 'ascat' : "${params.outDir}/VariantCalling/Ascat", 'freebayes' : "${params.outDir}/VariantCalling/FreeBayes", - 'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller", 'gvcf-hc' : "${params.outDir}/VariantCalling/HaplotypeCallerGVCF", + 'haplotypecaller' : "${params.outDir}/VariantCalling/HaplotypeCaller", 'manta' : "${params.outDir}/VariantCalling/Manta", 'mutect1' : "${params.outDir}/VariantCalling/MuTect1", 'mutect2' : "${params.outDir}/VariantCalling/MuTect2", diff --git a/somaticVC.nf b/somaticVC.nf index ac54bc8ef7..ce35f2f9a1 100644 --- a/somaticVC.nf +++ b/somaticVC.nf @@ -777,7 +777,11 @@ if (params.verbose) ascatOutput = ascatOutput.view { (strelkaIndels, strelkaSNVS) = strelkaOutput.into(2) (mantaSomaticSV, mantaDiploidSV) = mantaOutput.into(2) -vcfForBCFtools = Channel.empty().mix( +vcfForQC = Channel.empty().mix( + vcfConcatenated.map { + variantcaller, idPatient, idSampleNormal, idSampleTumor, vcf, tbi -> + [variantcaller, vcf] + }, mantaDiploidSV.map { variantcaller, idPatient, idSampleNormal, idSampleTumor, vcf, tbi -> [variantcaller, vcf[2]] @@ -799,6 +803,8 @@ vcfForBCFtools = Channel.empty().mix( [variantcaller, vcf[1]] }) +(vcfForBCFtools, vcfForVCFtools) = vcfForQC.into(2) + process RunBcftoolsStats { tag {vcf} @@ -825,6 +831,49 @@ if (params.verbose) bcfReport = bcfReport.view { bcfReport.close() +process RunVcftools { + tag {vcf} + + publishDir directoryMap.vcftools, mode: 'link' + + input: + set variantCaller, file(vcf) from vcfForVCFtools + + output: + file ("${vcf.baseName}.*") into vcfReport + + when: !params.noReports + + script: + """ + vcftools \ + --gzvcf ${vcf} \ + --relatedness2 \ + --out ${vcf.baseName} + + vcftools \ + --gzvcf ${vcf} \ + --TsTv-by-count \ + --out ${vcf.baseName} + + vcftools \ + --gzvcf ${vcf} \ + --TsTv-by-qual \ + --out ${vcf.baseName} + + vcftools \ + --gzvcf ${vcf} \ + --FILTER-summary \ + --out ${vcf.baseName} + """ +} + +if (params.verbose) vcfReport = vcfReport.view { + "VCFTools stats report:\n\ + File : [${it.fileName}]" +} + +vcfReport.close() /* ================================================================================ = F U N C T I O N S = @@ -886,6 +935,7 @@ def defineDirectoryMap() { 'bamQC' : "${params.outDir}/Reports/bamQC", 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats", + 'vcftools' : "${params.outDir}/Reports/VCFTools", 'ascat' : "${params.outDir}/VariantCalling/Ascat", 'freebayes' : "${params.outDir}/VariantCalling/FreeBayes", 'manta' : "${params.outDir}/VariantCalling/Manta", From bee5b2108a08558528582be39f1537ed96d1803d Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 13 Apr 2018 13:13:55 +0200 Subject: [PATCH 4/9] add container for RunVcftools process --- configuration/containers.config | 1 + configuration/singularity-path.config | 1 + 2 files changed, 2 insertions(+) diff --git a/configuration/containers.config b/configuration/containers.config index fe67793c64..49651be1f2 100644 --- a/configuration/containers.config +++ b/configuration/containers.config @@ -39,5 +39,6 @@ process { $RunSnpeff.container = {params.genome == 'GRCh38' ? "${params.repository}/snpeffgrch38:${params.tag}" : "${params.repository}/snpeffgrch37:${params.tag}"} $RunStrelka.container = "${params.repository}/sarek:${params.tag}" $RunStrelkaBP.container = "${params.repository}/sarek:${params.tag}" + $RunVcftools.container = "${params.repository}/vcftools:${params.tag}" $RunVEP.container = {params.genome == 'GRCh38' ? "${params.repository}/vepgrch38:${params.tag}" : "${params.repository}/vepgrch37:${params.tag}"} } diff --git a/configuration/singularity-path.config b/configuration/singularity-path.config index 0edba851ad..c70cb37623 100644 --- a/configuration/singularity-path.config +++ b/configuration/singularity-path.config @@ -45,5 +45,6 @@ process { $RunSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"} $RunStrelka.container = "${params.containerPath}/sarek-${params.tag}.img" $RunStrelkaBP.container = "${params.containerPath}/sarek-${params.tag}.img" + $RunVcftools.container = "${params.containerPath}/vcftools-${params.tag}.img" $RunVEP.container = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"} } From f45b46f6587b6776384f2a792e4944da2870a700 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 13 Apr 2018 13:14:11 +0200 Subject: [PATCH 5/9] add vcftools to MultiQC report --- runMultiQC.nf | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/runMultiQC.nf b/runMultiQC.nf index ebef597279..fe90ce222f 100644 --- a/runMultiQC.nf +++ b/runMultiQC.nf @@ -89,6 +89,7 @@ process GenerateMultiQCconfig { echo "- 'samtools'" >> multiqc_config.yaml echo "- 'qualimap'" >> multiqc_config.yaml echo "- 'bcftools'" >> multiqc_config.yaml + echo "- 'vcftools'" >> multiqc_config.yaml echo "- 'snpeff'" >> multiqc_config.yaml """ } @@ -106,6 +107,7 @@ reportsForMultiQC = Channel.empty() Channel.fromPath("${directoryMap.markDuplicatesQC}/*"), Channel.fromPath("${directoryMap.samtoolsStats}/*"), Channel.fromPath("${directoryMap.snpeffReports}/*"), + Channel.fromPath("${directoryMap.vcftools}/*"), multiQCconfig ).collect() @@ -148,10 +150,11 @@ def defineDirectoryMap() { 'bamQC' : "${params.outDir}/Reports/bamQC", 'bcftoolsStats' : "${params.outDir}/Reports/BCFToolsStats", 'fastQC' : "${params.outDir}/Reports/FastQC", - 'snpeffReports' : "${params.outDir}/Reports/SnpEff", 'markDuplicatesQC' : "${params.outDir}/Reports/MarkDuplicates", 'multiQC' : "${params.outDir}/Reports/MultiQC", - 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats" + 'samtoolsStats' : "${params.outDir}/Reports/SamToolsStats", + 'snpeffReports' : "${params.outDir}/Reports/SnpEff", + 'vcftools' : "${params.outDir}/Reports/VCFTools" ] } From 6b62d0e6ee91ff800c11b77afc1c2a60ea186e23 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 13 Apr 2018 13:39:01 +0200 Subject: [PATCH 6/9] update helping and testing scripts --- scripts/do_all.sh | 8 ++++---- scripts/test.sh | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/do_all.sh b/scripts/do_all.sh index e53acc23c5..4a300b7474 100755 --- a/scripts/do_all.sh +++ b/scripts/do_all.sh @@ -46,7 +46,7 @@ do esac done -if [ $GENOME = smallGRCh37 ] +if [[ $GENOME = smallGRCh37 ]] then $GENOME = GRCh37 fi @@ -55,10 +55,10 @@ function toLower() { echo $1 | tr '[:upper:]' '[:lower:]' } -if [ $TOOL = docker ] && [ GRCh37,GRCh38 =~ $GENOME ] +if [[ $TOOL = docker ]] && [[ GRCh37,GRCh38 =~ $GENOME ]] then - nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,sarek,snpeff + nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,sarek,snpeff,vcftools nextflow run buildContainers.nf -profile ${PROFILE} --verbose --docker ${PUSH} --repository ${REPOSITORY} --tag ${TAG} --containers snpeff$(toLower ${GENOME}),vep$(toLower ${GENOME}) else - nextflow run buildContainers.nf -profile ${PROFILE} --verbose --singularity --repository ${REPOSITORY} --tag ${TAG} --containerPath containers/ --containers fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,sarek,snpeff$(toLower ${GENOME}),vep$(toLower ${GENOME}) + nextflow run buildContainers.nf -profile ${PROFILE} --verbose --singularity --repository ${REPOSITORY} --tag ${TAG} --containerPath containers/ --containers fastqc,freebayes,gatk,igvtools,multiqc,mutect1,picard,qualimap,r-base,runallelecount,sarek,snpeff$(toLower ${GENOME}),vcftools,vep$(toLower ${GENOME}) fi diff --git a/scripts/test.sh b/scripts/test.sh index dd0cb25d5b..8fbfeb3d0b 100755 --- a/scripts/test.sh +++ b/scripts/test.sh @@ -85,7 +85,7 @@ fi if [[ ALL,STEP =~ $TEST ]] then - run_wrapper --germline --sample $SAMPLE + run_wrapper --germline --sampleDir data/tiny/tiny/normal run_wrapper --germline --step realign --noReports run_wrapper --germline --step recalibrate --noReports clean_repo From d04ddd046af818e3beaff8762aabbb5c97b05489 Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 13 Apr 2018 13:52:13 +0200 Subject: [PATCH 7/9] better output for ConcatVCF --- somaticVC.nf | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/somaticVC.nf b/somaticVC.nf index ce35f2f9a1..959519a6ab 100644 --- a/somaticVC.nf +++ b/somaticVC.nf @@ -428,8 +428,7 @@ process ConcatVCF { file(genomeIndex) from Channel.value(referenceMap.genomeIndex) output: - set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz") into vcfConcatenated - file("*.vcf.gz.tbi") into vcfConcatenatedTbi + set variantCaller, idPatient, idSampleNormal, idSampleTumor, file("*.vcf.gz"), file("*.vcf.gz.tbi") into vcfConcatenated when: ('mutect1' in tools || 'mutect2' in tools || 'freebayes' in tools ) && !params.onlyQC From e2ec392daa9763fc4bd3ca295e03829e61d7bb1d Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 13 Apr 2018 13:58:56 +0200 Subject: [PATCH 8/9] update README --- doc/BUILD.md | 1 + doc/CONTAINERS.md | 11 +++++++++++ doc/PROCESS.md | 1 + 3 files changed, 13 insertions(+) diff --git a/doc/BUILD.md b/doc/BUILD.md index b63ee8facb..d5b7a3a12c 100644 --- a/doc/BUILD.md +++ b/doc/BUILD.md @@ -27,6 +27,7 @@ nextflow run . [--docker] [--singularity] [--containerPath ] [--push] [--c - `snpeff` this container serves as a base for `snpeffgrch37` and `snpeffgrch38` - `snpeffgrch37` - `snpeffgrch38` + - `vcftools` - `vepgrch37` - `vepgrch38` diff --git a/doc/CONTAINERS.md b/doc/CONTAINERS.md index 0818073eb9..ae09d00b3c 100644 --- a/doc/CONTAINERS.md +++ b/doc/CONTAINERS.md @@ -9,6 +9,7 @@ For processing + germline variant calling + Reports: - [picard](#picard-) - [qualimap](#qualimap-) - [sarek](#sarek-) + - [vcftools](#vcftools-) For processing + somatic variant calling + Reports: - [fastqc](#fastqc-) @@ -21,6 +22,7 @@ For processing + somatic variant calling + Reports: - [r-base](#r-base-) - [runallelecount](#runallelecount-) - [sarek](#sarek-) + - [vcftools](#vcftools-) For annotation for GRCh37, you will need: - [snpeffgrch37](#snpeffgrch37-) @@ -104,6 +106,12 @@ A container named after the process is made for each process. If a container can - Contain **[snpEff][snpeff-link]** 4.3i - Contain GRCh38.86 +## vcftools [![vcftools-docker status][vcftools-docker-badge]][vcftools-docker-link] + +- Based on `nfcore/base:latest` +- Contain **[vcftools][vcftools-link]** 0.1.15 + + ## vepgrch37 [![vepgrch37-docker status][vepgrch37-docker-badge]][vepgrch37-docker-link] - Based on `willmclaren/ensembl-vep:release_90.6` @@ -169,6 +177,9 @@ A container named after the process is made for each process. If a container can [snpeffgrch38-docker-badge]: https://img.shields.io/docker/automated/maxulysse/snpeffgrch38.svg [snpeffgrch38-docker-link]: https://hub.docker.com/r/maxulysse/snpeffgrch38 [strelka-link]: https://github.com/Illumina/strelka +[vcftools-docker-badge]: https://img.shields.io/docker/automated/maxulysse/vcftools.svg +[vcftools-docker-link]: https://hub.docker.com/r/maxulysse/vcftools +[vcftools-link]: https://vcftools.github.io/index.html [vep-docker-badge]: https://img.shields.io/docker/automated/maxulysse/vep.svg [vep-docker-link]: https://hub.docker.com/r/maxulysse/vep [vep-link]: https://github.com/Ensembl/ensembl-vep diff --git a/doc/PROCESS.md b/doc/PROCESS.md index 2a20c0562d..b37f3da14c 100644 --- a/doc/PROCESS.md +++ b/doc/PROCESS.md @@ -42,6 +42,7 @@ We divide them for the moment into 5 main steps: - RunSamtoolsStats - Run Samtools stats on recalibrated BAM files - RunBamQC - Run qualimap BamQC on recalibrated BAM files - RunBcftoolsStats - Run BCFTools stats on vcf files +- RunVcftools - Run VCFTools on vcf files ## Annotation: From b30b16630243dba27c239a5d1e3ffedf72bf66ea Mon Sep 17 00:00:00 2001 From: Maxime Garcia Date: Fri, 13 Apr 2018 14:04:16 +0200 Subject: [PATCH 9/9] aligning = --- configuration/singularity-path.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configuration/singularity-path.config b/configuration/singularity-path.config index c70cb37623..bbbb6b6fc8 100644 --- a/configuration/singularity-path.config +++ b/configuration/singularity-path.config @@ -45,6 +45,6 @@ process { $RunSnpeff.container = {params.genome == 'GRCh38' ? "${params.containerPath}/snpeffgrch38-${params.tag}.img" : "${params.containerPath}/snpeffgrch37-${params.tag}.img"} $RunStrelka.container = "${params.containerPath}/sarek-${params.tag}.img" $RunStrelkaBP.container = "${params.containerPath}/sarek-${params.tag}.img" - $RunVcftools.container = "${params.containerPath}/vcftools-${params.tag}.img" + $RunVcftools.container = "${params.containerPath}/vcftools-${params.tag}.img" $RunVEP.container = {params.genome == 'GRCh38' ? "${params.containerPath}/vepgrch38-${params.tag}.img" : "${params.containerPath}/vepgrch37-${params.tag}.img"} }