Skip to content
This repository has been archived by the owner on Oct 23, 2023. It is now read-only.

Commit

Permalink
added multi-caller somatic variant detection to tumor-normal pipelines
Browse files Browse the repository at this point in the history
  • Loading branch information
jlac committed Jun 4, 2019
1 parent dbf4d75 commit 0cb0262
Show file tree
Hide file tree
Showing 11 changed files with 82 additions and 28 deletions.
38 changes: 33 additions & 5 deletions Rules/all-exomeseq-somatic.rl
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@ if config['project']['annotation'] == "hg19":

rule all_exomeseq_somatic:
input: expand("{s}"+".recal.bam",s=samples),
# expand("{s}"+".g.vcf",s=samples),
expand(config['project']['workpath']+"/mutect_out/{p}"+".FINAL.vcf",p=pairs),
expand(config['project']['workpath']+"/mutect2_out/{p}"+".FINALmutect2.vcf",p=pairs),
expand(config['project']['workpath']+"/strelka_out/{p}"+"_FINAL.vcf",p=pairs),
Expand All @@ -13,7 +12,6 @@ if config['project']['annotation'] == "hg19":
config['project']['workpath']+"/mutect_out/mutect_maf_summary.pdf",
config['project']['workpath']+"/strelka_out/strelka_maf_summary.pdf",
expand(config['project']['workpath']+"/conpair_out/{p}.conpair", p=pairs),
"sample_network.bmp",
config['project']['workpath']+"/mutect2_out/mutect2_variants.database",
config['project']['workpath']+"/mutect_out/mutect_variants.database",
config['project']['workpath']+"/strelka_out/strelka_variants.database",
Expand All @@ -29,6 +27,17 @@ if config['project']['annotation'] == "hg19":
"QC/decoy",
expand("sequenza_out/{p}"+"_alternative_solutions.txt",p=pairs),
expand("freec_out/pass2/{p}"+".recal.bam_CNVs.p.value.txt",p=pairs),
"sample_network_mqc.png",
expand(config['project']['workpath']+"/vardict_out/{p}"+".FINAL.vcf",p=pairs),
expand(config['project']['workpath']+"/vardict_out/oncotator_out/{p}"+".maf",p=pairs),
config['project']['workpath']+"/vardict_out/oncotator_out/final_filtered.maf",
config['project']['workpath']+"/merged_somatic_variants/oncotator_out/final_filtered.maf",
config['project']['workpath']+"/vardict_out/mutsigCV_out/somatic.sig_genes.txt",
config['project']['workpath']+"/merged_somatic_variants/mutsigCV_out/somatic.sig_genes.txt",
expand(config['project']['workpath']+"/merged_somatic_variants/{p}"+".merged.vcf",p=pairs),
expand(config['project']['workpath']+"/merged_somatic_variants/oncotator_out/{p}"+".maf",p=pairs),
config['project']['workpath']+"/vardict_out/vardict_maf_summary.pdf",
config['project']['workpath']+"/merged_somatic_variants/merged_maf_summary.pdf",
output:
params: rname="final"
shell: """
Expand All @@ -39,7 +48,6 @@ elif config['project']['annotation'] == "hg38":

rule all_exomeseq_somatic:
input: expand("{s}"+".recal.bam",s=samples),
# expand("{s}"+".g.vcf",s=samples),
expand(config['project']['workpath']+"/mutect_out/{p}"+".FINAL.vcf",p=pairs),
expand(config['project']['workpath']+"/mutect2_out/{p}"+".FINALmutect2.vcf",p=pairs),
expand(config['project']['workpath']+"/strelka_out/{p}"+"_FINAL.vcf",p=pairs),
Expand All @@ -50,7 +58,6 @@ elif config['project']['annotation'] == "hg38":
config['project']['workpath']+"/mutect_out/mutect_maf_summary.pdf",
config['project']['workpath']+"/strelka_out/strelka_maf_summary.pdf",
expand(config['project']['workpath']+"/conpair_out/{p}.conpair", p=pairs),
"sample_network.bmp",
config['project']['workpath']+"/mutect2_out/mutect2_variants.database",
config['project']['workpath']+"/mutect_out/mutect_variants.database",
config['project']['workpath']+"/strelka_out/strelka_variants.database",
Expand All @@ -66,6 +73,17 @@ elif config['project']['annotation'] == "hg38":
"QC/decoy",
expand("sequenza_out/{p}"+"_alternative_solutions.txt",p=pairs),
expand("freec_out/pass2/{p}"+".recal.bam_CNVs.p.value.txt",p=pairs),
"sample_network_mqc.png",
expand(config['project']['workpath']+"/vardict_out/{p}"+".FINAL.vcf",p=pairs),
expand(config['project']['workpath']+"/vardict_out/oncotator_out/{p}"+".maf",p=pairs),
config['project']['workpath']+"/vardict_out/oncotator_out/final_filtered.maf",
config['project']['workpath']+"/merged_somatic_variants/oncotator_out/final_filtered.maf",
config['project']['workpath']+"/vardict_out/mutsigCV_out/somatic.sig_genes.txt",
config['project']['workpath']+"/merged_somatic_variants/mutsigCV_out/somatic.sig_genes.txt",
expand(config['project']['workpath']+"/merged_somatic_variants/{p}"+".merged.vcf",p=pairs),
expand(config['project']['workpath']+"/merged_somatic_variants/oncotator_out/{p}"+".maf",p=pairs),
config['project']['workpath']+"/vardict_out/vardict_maf_summary.pdf",
config['project']['workpath']+"/merged_somatic_variants/merged_maf_summary.pdf",
output:
params: rname="final"
shell: """
Expand All @@ -87,7 +105,6 @@ elif config['project']['annotation'] == "mm10":
config['project']['workpath']+"/mutect2_out/mutect2_variants.database",
config['project']['workpath']+"/mutect_out/mutect_variants.database",
config['project']['workpath']+"/strelka_out/strelka_variants.database",
"sample_network.bmp",
config['project']['workpath']+"/mutect2_out/oncotator_out/final_filtered.maf",
config['project']['workpath']+"/mutect2_out/mutsigCV_out/somatic.sig_genes.txt",
config['project']['workpath']+"/strelka_out/oncotator_out/final_filtered.maf",
Expand All @@ -98,6 +115,17 @@ elif config['project']['annotation'] == "mm10":
expand("manta_out/{p}/results/variants/candidateSV.vcf.gz", p=pairs),
"admixture_out/admixture_table.tsv",
"QC/decoy"
"sample_network_mqc.png",
expand(config['project']['workpath']+"/vardict_out/{p}"+".FINAL.vcf",p=pairs),
expand(config['project']['workpath']+"/vardict_out/oncotator_out/{p}"+".maf",p=pairs),
config['project']['workpath']+"/vardict_out/oncotator_out/final_filtered.maf",
config['project']['workpath']+"/merged_somatic_variants/oncotator_out/final_filtered.maf",
config['project']['workpath']+"/vardict_out/mutsigCV_out/somatic.sig_genes.txt",
config['project']['workpath']+"/merged_somatic_variants/mutsigCV_out/somatic.sig_genes.txt",
expand(config['project']['workpath']+"/merged_somatic_variants/{p}"+".merged.vcf",p=pairs),
expand(config['project']['workpath']+"/merged_somatic_variants/oncotator_out/{p}"+".maf",p=pairs),
config['project']['workpath']+"/vardict_out/vardict_maf_summary.pdf",
config['project']['workpath']+"/merged_somatic_variants/merged_maf_summary.pdf",
output:
params: rname="final"
shell: """
Expand Down
9 changes: 9 additions & 0 deletions Rules/maftools.rl
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,15 @@ rule maftools_strelka:
params: dir=config['project']['workpath'],rname="pl:maftools"
shell: "cat strelka_out/oncotator_out/*.maf > strelka_out/oncotator_out/strelka_variants.maf; perl Scripts/prep_mafs.pl strelka_out/oncotator_out/strelka_variants.maf strelka_out; module load R/3.5; Rscript Scripts/maftools.R {params.dir}/strelka_out/oncotator_out/ final_filtered.maf {params.dir}/strelka_out/strelka_maf_summary.pdf {output.oncoprint}"

rule maftools_vardict:
input: expand(config['project']['workpath']+"/vardict_out/oncotator_out/{x}.maf",x=pairs),
output: pre=temp(config['project']['workpath']+"/vardict_out/oncotator_out/vardict_variants.maf"),
fin=config['project']['workpath']+"/vardict_out/oncotator_out/final_filtered.maf",
summary=config['project']['workpath']+"/vardict_out/vardict_maf_summary.pdf",
oncoprint=config['project']['workpath']+"/vardict_out/vardict_oncoplot.pdf",
params: dir=config['project']['workpath'],rname="pl:maftools"
shell: "cat vardict_out/oncotator_out/*.maf > vardict_out/oncotator_out/vardict_variants.maf; perl Scripts/prep_mafs.pl vardict_out/oncotator_out/vardict_variants.maf vardict_out; module load R/3.5; Rscript Scripts/maftools.R {params.dir}/vardict_out/oncotator_out/ final_filtered.maf {params.dir}/vardict_out/vardict_maf_summary.pdf {output.oncoprint}"

rule maftools_merged:
input: expand(config['project']['workpath']+"/merged_somatic_variants/oncotator_out/{x}.maf",x=pairs),
output: pre=temp(config['project']['workpath']+"/merged_somatic_variants/oncotator_out/merged_variants.maf"),
Expand Down
7 changes: 5 additions & 2 deletions Rules/merge_somatic_tumoronly_vcfs.rl
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,8 @@ rule merge_somatic_tumoronly_vcfs:
mutect2=config['project']['workpath']+"/mutect2_out/{x}.FINALmutect2.vcf",
vardict=config['project']['workpath']+"/vardict_out/{x}.FINAL.vcf",
output: mergedvcf=config['project']['workpath']+"/merged_somatic_variants/{x}.merged.vcf",
params: gres="lscratch:100",gatk=config['bin'][pfamily]['GATK'],genome=config['references'][pfamily]['GENOME'],snpsites=config['references'][pfamily]['SNPSITES'],rname="CombineVariants"
shell: "mkdir -p merged_somatic_variants; module load GATK/3.8-0; java -Xmx48g -Djava.io.tmpdir=/lscratch/$SLURM_JOBID -jar $GATK_JAR -T CombineVariants -R {params.genome} -nt 8 --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED --genotypemergeoption PRIORITIZE --rod_priority_list mutect2,vardict,mutect --minimumN 2 -o {output.mergedvcf} --variant:mutect {input.mutect} --variant:mutect2 {input.mutect2} --variant:vardict {input.vardict}"
csvstats=config['project']['workpath']+"/merged_somatic_variants/{x}.mutect.stats.csv",
htmlstats=config['project']['workpath']+"/merged_somatic_variants/{x}.mutect.stats.html",
out=config['project']['workpath']+"/merged_somatic_variants/{x}.snpeff.out"
params: gres="lscratch:100",gatk=config['bin'][pfamily]['GATK'],genome=config['references'][pfamily]['GENOME'],snpsites=config['references'][pfamily]['SNPSITES'],snpeffgenome=config['references'][pfamily]['SNPEFF_GENOME'],snpeff=config['bin'][pfamily]['SNPEFF'],effconfig=config['references'][pfamily]['SNPEFF_CONFIG'],rname="CombineVariants"
shell: "mkdir -p merged_somatic_variants; module load GATK/3.8-0; java -Xmx48g -Djava.io.tmpdir=/lscratch/$SLURM_JOBID -jar $GATK_JAR -T CombineVariants -R {params.genome} -nt 8 --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED --genotypemergeoption PRIORITIZE --rod_priority_list mutect2,vardict,mutect --minimumN 2 -o {output.mergedvcf} --variant:mutect {input.mutect} --variant:mutect2 {input.mutect2} --variant:vardict {input.vardict}; module load snpEff/4.3t; java -Xmx12g -jar $SNPEFF_JAR -v {params.snpeffgenome} -c {params.effconfig} -cancer -canon -csvStats {output.csvstats} -stats {output.htmlstats} -cancerSamples pairs {output.mergedvcf} > {output.out}"
10 changes: 7 additions & 3 deletions Rules/merge_somatic_vcfs.rl
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ rule merge_somatic_vcfs:
input: mutect=config['project']['workpath']+"/mutect_out/{x}.FINAL.vcf",
strelka=config['project']['workpath']+"/strelka_out/{x}_FINAL.vcf",
mutect2=config['project']['workpath']+"/mutect2_out/{x}.FINALmutect2.vcf",
output: mergedvcf=config['project']['workpath']+"/merged_somatic_variants/{x}_merged.vcf",
params: gres="lscratch:100",gatk=config['bin'][pfamily]['GATK'],genome=config['references'][pfamily]['GENOME'],snpsites=config['references'][pfamily]['SNPSITES'],rname="CombineVariants"
shell: "mkdir -p merged_somatic_variants; module load GATK/3.8-0; java -Xmx48g -Djava.io.tmpdir=/lscratch/$SLURM_JOBID -jar $GATK_JAR -T CombineVariants -R {params.genome} -nt 8 --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED --genotypemergeoption PRIORITIZE --rod_priority_list mutect2,mutect,strelka -o {output.mergedvcf} --variant:mutect {input.mutect} --variant:strelka {input.strelka} --variant:mutect2 {input.mutect2}"
vardict=config['project']['workpath']+"/vardict_out/{x}.FINAL.vcf",
output: mergedvcf=config['project']['workpath']+"/merged_somatic_variants/{x}.merged.vcf",
csvstats=config['project']['workpath']+"/merged_somatic_variants/{x}.mutect.stats.csv",
htmlstats=config['project']['workpath']+"/merged_somatic_variants/{x}.mutect.stats.html",
out=config['project']['workpath']+"/merged_somatic_variants/{x}.snpeff.out",
params: gres="lscratch:100",gatk=config['bin'][pfamily]['GATK'],genome=config['references'][pfamily]['GENOME'],snpsites=config['references'][pfamily]['SNPSITES'],snpeffgenome=config['references'][pfamily]['SNPEFF_GENOME'],snpeff=config['bin'][pfamily]['SNPEFF'],effconfig=config['references'][pfamily]['SNPEFF_CONFIG'],rname="CombineVariants"
shell: "mkdir -p merged_somatic_variants; module load GATK/3.8-0; java -Xmx48g -Djava.io.tmpdir=/lscratch/$SLURM_JOBID -jar $GATK_JAR -T CombineVariants -R {params.genome} -nt 8 --filteredrecordsmergetype KEEP_IF_ANY_UNFILTERED --genotypemergeoption PRIORITIZE --rod_priority_list mutect2,mutect,strelka,vardict -o {output.mergedvcf} --variant:mutect {input.mutect} --variant:strelka {input.strelka} --variant:mutect2 {input.mutect2} --variant:vardict {input.vardict}; module load snpEff/4.3t; java -Xmx12g -jar $SNPEFF_JAR -v {params.snpeffgenome} -c {params.effconfig} -cancer -canon -csvStats {output.csvstats} -stats {output.htmlstats} -cancerSamples pairs {output.mergedvcf} > {output.out}"
3 changes: 2 additions & 1 deletion Rules/mkdir_somatic.rl
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ rule mkdir_somatic:
mutect2_dir=config['project']['workpath']+"/mutect2_out",
strelka_dir=config['project']['workpath']+"/strelka_out",
mutect_dir=config['project']['workpath']+"/mutect_out",
vardict_dir=config['project']['workpath']+"/vardict_out",
merged_dir=config['project']['workpath']+"/merged_somatic_variants",
params: rname="pl:mkdir"
shell: "echo \'decoy\' > {output}; mkdir -p strelka_out; mkdir -p strelka_out/oncotator_out; mkdir -p strelka_out/mutsigCV_out; mkdir -p mutect_out; mkdir -p mutect_out/oncotator_out; mkdir -p mutect_out/mutsigCV_out; mkdir -p conpair_out; mkdir -p germline_vcfs; mkdir -p mutect2_out; mkdir -p mutect2_out/oncotator_out; mkdir -p mutect2_out/mutsigCV_out; mkdir -p mutect2_out/chrom_files; mkdir -p manta_out; mkdir -p merged_somatic_variants; mkdir -p merged_somatic_variants/oncotator_out; mkdir -p merged_somatic_variants/mutsigCV_out"
shell: "echo \'decoy\' > {output}; mkdir -p strelka_out; mkdir -p strelka_out/oncotator_out; mkdir -p strelka_out/mutsigCV_out; mkdir -p mutect_out; mkdir -p mutect_out/oncotator_out; mkdir -p mutect_out/mutsigCV_out; mkdir -p conpair_out; mkdir -p germline_vcfs; mkdir -p mutect2_out; mkdir -p mutect2_out/oncotator_out; mkdir -p mutect2_out/mutsigCV_out; mkdir -p mutect2_out/chrom_files; mkdir -p manta_out; mkdir -p merged_somatic_variants; mkdir -p merged_somatic_variants/oncotator_out; mkdir -p merged_somatic_variants/mutsigCV_out; mkdir -p vardict_out; mkdir -p vardict_out/oncotator_out; mkdir -p vardict_out/mutsigCV_out"
3 changes: 1 addition & 2 deletions Rules/mutect.rl
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
rule mutect:
input: normal=lambda wildcards: config['project']['pairs'][wildcards.x][0]+".recal.bam",
tumor=lambda wildcards: config['project']['pairs'][wildcards.x][1]+".recal.bam",
targets=ancient("exome_targets.bed"),
normalbai=lambda wildcards: config['project']['pairs'][wildcards.x][0]+".recal.bam.bai",
tumorbai=lambda wildcards: config['project']['pairs'][wildcards.x][1]+".recal.bam.bai"
output: vcf=temp(config['project']['workpath']+"/mutect_out/{x}.vcf"),
Expand All @@ -11,4 +10,4 @@ rule mutect:
htmlstats=config['project']['workpath']+"/mutect_out/{x}.mutect.stats.html",
out=config['project']['workpath']+"/mutect_out/{x}.snpeff.out"
params: normalsample=lambda wildcards: config['project']['pairs'][wildcards.x][0],tumorsample=lambda wildcards: config['project']['pairs'][wildcards.x][1],targets="exome_targets.bed",knowns=config['references'][pfamily]['MUTECTVARIANTS'],mutect=config['bin'][pfamily]['MUTECT'],gatk=config['bin'][pfamily]['GATK'],genome=config['references'][pfamily]['MUTECTGENOME'],cosmic=config['references'][pfamily]['MUTECTCOSMIC'],snp=config['references'][pfamily]['MUTECTSNP'],snpeffgenome=config['references'][pfamily]['SNPEFF_GENOME'],snpeff=config['bin'][pfamily]['SNPEFF'],effconfig=config['references'][pfamily]['SNPEFF_CONFIG'],rname="pl:mutect"
shell: "module load muTect/1.1.7; muTect --analysis_type MuTect --reference_sequence {params.genome} --vcf {output.vcf} {params.knowns} --intervals {params.targets} --disable_auto_index_creation_and_locking_when_reading_rods --input_file:normal {input.normal} --input_file:tumor {input.tumor} --out {output.stats} -rf BadCigar; module load GATK/3.8-0; module load java/1.8.0_92; GATK -m 48G SelectVariants -R {params.genome} --variant {output.vcf} --excludeFiltered -o {output.vcfRename}; module load snpEff/4.3t; java -Xmx12g -jar $SNPEFF_JAR -v {params.snpeffgenome} -c {params.effconfig} -interval {params.targets} -cancer -canon -csvStats {output.csvstats} -stats {output.htmlstats} -cancerSamples pairs {output.vcfRename} > {output.out}"
shell: "module load muTect/1.1.7; muTect --analysis_type MuTect --reference_sequence {params.genome} --vcf {output.vcf} {params.knowns} --disable_auto_index_creation_and_locking_when_reading_rods --input_file:normal {input.normal} --input_file:tumor {input.tumor} --out {output.stats} -rf BadCigar; module load GATK/3.8-0; module load java/1.8.0_92; GATK -m 48G SelectVariants -R {params.genome} --variant {output.vcf} --excludeFiltered -o {output.vcfRename}; module load snpEff/4.3t; java -Xmx12g -jar $SNPEFF_JAR -v {params.snpeffgenome} -c {params.effconfig} -interval {params.targets} -cancer -canon -csvStats {output.csvstats} -stats {output.htmlstats} -cancerSamples pairs {output.vcfRename} > {output.out}"
2 changes: 1 addition & 1 deletion Rules/sequenza.rl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
rule sequenza:
input: freeccnvs="freec_out/pass1/{x}.recal.bam_CNVs",
input: freeccnvs="freec_out/pass1/{x}.recal.bam_CNVs.p.value.txt",
output: fit="sequenza_out/{x}"+"_alternative_solutions.txt",
params: dir=config['project']['workpath'],tumorsample=lambda wildcards: config['project']['pairs'][wildcards.x][1],normalsample=lambda wildcards: config['project']['pairs'][wildcards.x][0],gc=config['references'][pfamily]['SEQUENZAGC'],rname="pl:sequenza"
threads: 8
Expand Down
Loading

0 comments on commit 0cb0262

Please sign in to comment.