Skip to content
This repository has been archived by the owner on Oct 23, 2023. It is now read-only.

Commit

Permalink
Merge pull request #305 from jlac/master
Browse files Browse the repository at this point in the history
added population allele frequency filtering to MAF and set 10-samples threshold for mutsig to run
  • Loading branch information
jlac committed Dec 14, 2017
2 parents 35e747f + c2f0cd7 commit 6b1c988
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 115 deletions.
73 changes: 34 additions & 39 deletions Results-template/Scripts/prep_mafs.pl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,9 @@
#INPUT

my $finalmaf = $ARGV[1] . '_out/oncotator_out/' . $ARGV[1] . '_merged.maf'; #to fix...
my $filtmaf = $ARGV[1] . '_out/oncotator_out/' . $ARGV[1] . '_mergedFiltered.maf'; #to fix...
open C, ">$finalmaf";
open D, ">$filtmaf";
#print C "Chromosome\tRead_1_Start\tRead_2_Start\tHighCounts\tLowCounts\tHomozygous_informative\tHeterozygous_informative\tComps\tHighProp\tLowProp\tWinner\n";

my $maffile = $ARGV[0]; #to fix...
Expand Down Expand Up @@ -78,8 +80,6 @@
}
}

#if ($ARGV[1] eq 'mutect2'){

print C "$header\ttumor_freq\tNonsilent_gene_mutation_count\n";

my $a = 0;
Expand All @@ -95,44 +95,39 @@
}
print C "$variants[$a]\t$freq[$a+1]\t$genecount\n";
}
#}

#elsif ($ARGV[1] eq 'mutect'){

#print C "$header\ttumor_freq\tNonsilent_gene_mutation_count\n";
close C;
close G;

#my $a = 0;
#for ($a = 0; $a < @variants; $a++) {
# my $d=0;
# my $genecount=0;
# for ($d=0; $d<@hugoid; $d++) {
# @line=split "\t", $variants[$a];
# if ($hugoid[$d] eq $line[0]) {
# $genecount=$count[$d];
# last;
# }
# }
# print C "$variants[$a]\t$freq[$a+1]\t$genecount\n";
#}
#}
my @columns=(split "\t",$header);
my $col='';

#else {
my $z=0;
for ($z=0; $z<@columns; $z++)
if ($columns[$z] eq 'ExAC_AF') {
$col=$z;
last;
}
}

#print C "$header\tNonsilent_gene_mutation_count\n";
open F, "<$finalmaf";
while (<F>){
chomp;
last if m/^$/;
@line = split "\t", $_;
if ($line[0] eq 'Hugo_Symbol') {
print D "$line[0]\n";
}
else {
if ($line[$z] eq '-') {
print D "$line[0]\n";
}
else {
if ($line[$z] <= 0.001) {
print D "$line[0]\n";
}
}
}
}

#my $a = 0;
#for ($a = 0; $a < @variants; $a++) {
# my $d=0;
# my $genecount=0;
# for ($d=0; $d<@hugoid; $d++) {
# @line=split "\t", $variants[$a];
# if ($hugoid[$d] eq $line[0]) {
# $genecount=$count[$d];
# last;
# }
# }
# print C "$variants[$a]\t$genecount\n";
#}
#}
close C;
close G;
close D;
close F;
6 changes: 3 additions & 3 deletions Rules/database_somatic.rl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
rule database_somatic:
input: mutect2maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_merged.maf",
mutectmaf=config['project']['workpath']+"/mutect_out/oncotator_out/mutect_merged.maf",
strelkamaf=config['project']['workpath']+"/strelka_out/oncotator_out/strelka_merged.maf",
input: mutect2maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_filtered.maf",
mutectmaf=config['project']['workpath']+"/mutect_out/oncotator_out/mutect_filtered.maf",
strelkamaf=config['project']['workpath']+"/strelka_out/oncotator_out/strelka_filtered.maf",
output: mutect2dbase=config['project']['workpath']+"/mutect2_out/mutect2_variants.database",
mutectdbase=config['project']['workpath']+"/mutect_out/mutect_variants.database",
strelkadbase=config['project']['workpath']+"/strelka_out/strelka_variants.database",
Expand Down
2 changes: 1 addition & 1 deletion Rules/database_somatic_tumoronly.rl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
rule database_somatic_tumoronly:
input: mutect2maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_merged.maf",
input: mutect2maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_filtered.maf",
output: mutect2dbase=config['project']['workpath']+"/mutect2_out/mutect2_variants.database",
params: rname="pl:database"
shell: "cd mutect2_out; sed '1!b;s/#//g' {input.mutect2maf} > temp.database; module load R/3.4.0_gcc-6.2.0; Rscript ../Scripts/dedupcol.R; echo -n \"create table vcf (\\\"\" > sqlite3.commands; sed -n 1p temp.database | sed 's/#//g' | sed 's/\\t/\" text, \"/g' | sed '${{s/$/\\\" text);/}}' >> sqlite3.commands; echo \".separator \\\"\\\\t\\\"\" >> sqlite3.commands; echo \".import temp.database vcf\" >> sqlite3.commands; tail -n +2 temp.database > temp.temp.database; mv temp.temp.database temp.database; sqlite3 {output.mutect2dbase} < sqlite3.commands"
2 changes: 1 addition & 1 deletion Rules/gatk.genotype.somatic.gvcfs.rl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ rule gatk_genotype_somatic_gvcfs:
run:
fl=os.popen("ls x*.gvcf").read().split()
var=" --variant "+" --variant ".join(fl)
cmd="module load GATK/3.6; GATK -m 120G GenotypeGVCFs -R {params.genome} --annotation InbreedingCoeff --annotation FisherStrand --annotation QualByDepth --annotation ChromosomeCounts --dbsnp {params.snpsites} -o {output} -nt {threads}"+var
cmd="module load GATK/3.6; GATK -m 96G GenotypeGVCFs -R {params.genome} --annotation InbreedingCoeff --annotation FisherStrand --annotation QualByDepth --annotation ChromosomeCounts --dbsnp {params.snpsites} -o {output} -nt {threads}"+var
print(cmd)
shell(cmd)
17 changes: 10 additions & 7 deletions Rules/maftools.rl
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
rule maftools:
input: mutect2=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_variants.maf",
mutect=config['project']['workpath']+"/mutect_out/oncotator_out/mutect_variants.maf",
strelka=config['project']['workpath']+"/strelka_out/oncotator_out/strelka_variants.maf",
output: mutect2maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_merged.maf",
mutectmaf=config['project']['workpath']+"/mutect_out/oncotator_out/mutect_merged.maf",
strelkamaf=config['project']['workpath']+"/strelka_out/oncotator_out/strelka_merged.maf",
input: expand(config['project']['workpath']+"/mutect2_out/oncotator_out/{x}.maf",x=pairs),
expand(config['project']['workpath']+"/strelka_out/oncotator_out/{x}.maf",x=pairs),
expand(config['project']['workpath']+"/mutect_out/oncotator_out/{x}.maf",x=pairs),
output: mutectpre=config['project']['workpath']+"/mutect_out/oncotator_out/mutect_variants.maf",
mutect2pre=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_variants.maf",
strelkapre=config['project']['workpath']+"/strelka_out/oncotator_out/strelka_variants.maf",
mutect2maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_filtered.maf",
mutectmaf=config['project']['workpath']+"/mutect_out/oncotator_out/mutect_filtered.maf",
strelkamaf=config['project']['workpath']+"/strelka_out/oncotator_out/strelka_filtered.maf",
mutect2summary=config['project']['workpath']+"/mutect2_out/mutect2_maf_summary.pdf",
mutect2oncoprint=config['project']['workpath']+"/mutect2_out/mutect2_oncoplot.pdf",
mutectsummary=config['project']['workpath']+"/mutect_out/mutect_maf_summary.pdf",
mutectoncoprint=config['project']['workpath']+"/mutect_out/mutect_oncoplot.pdf",
strelkasummary=config['project']['workpath']+"/strelka_out/strelka_maf_summary.pdf",
strelkaoncoprint=config['project']['workpath']+"/strelka_out/strelka_oncoplot.pdf",
params: dir=config['project']['workpath'],rname="pl:maftools"
shell: "perl Scripts/prep_mafs.pl {input.mutect2} mutect2; perl Scripts/prep_mafs.pl {input.mutect} mutect; perl Scripts/prep_mafs.pl {input.strelka} strelka; module load R/3.4.0_gcc-6.2.0; Rscript Scripts/maftools.R {params.dir}/mutect2_out/oncotator_out/ mutect2_merged.maf {params.dir}/mutect2_out/mutect2_maf_summary {output.mutect2oncoprint}; Rscript Scripts/maftools.R {params.dir}/mutect_out/oncotator_out/ mutect_merged.maf {params.dir}/mutect_out/mutect_maf_summary {output.mutectoncoprint}; Rscript Scripts/maftools.R {params.dir}/strelka_out/oncotator_out/ strelka_merged.maf {params.dir}/strelka_out/strelka_maf_summary {output.strelkaoncoprint}"
shell: "cat mutect2_out/oncotator_out/*.maf > mutect2_out/oncotator_out/mutect2_variants.maf; cat mutect_out/oncotator_out/*.maf > mutect_out/oncotator_out/mutect_variants.maf; cat strelka_out/oncotator_out/*.maf > strelka_out/oncotator_out/strelka_variants.maf; perl Scripts/prep_mafs.pl mutect2_out/oncotator_out/mutect2_variants.maf mutect2; perl Scripts/prep_mafs.pl mutect_out/oncotator_out/mutect_variants.maf mutect; perl Scripts/prep_mafs.pl strelka_out/oncotator_out/strelka_variants.maf strelka; module load R/3.4.0_gcc-6.2.0; Rscript Scripts/maftools.R {params.dir}/mutect2_out/oncotator_out/ mutect2_filtered.maf {params.dir}/mutect2_out/mutect2_maf_summary {output.mutect2oncoprint}; Rscript Scripts/maftools.R {params.dir}/mutect_out/oncotator_out/ mutect_filtered.maf {params.dir}/mutect_out/mutect_maf_summary {output.mutectoncoprint}; Rscript Scripts/maftools.R {params.dir}/strelka_out/oncotator_out/ strelka_filtered.maf {params.dir}/strelka_out/strelka_maf_summary {output.strelkaoncoprint}"
6 changes: 3 additions & 3 deletions Rules/maftools_tumoronly.rl
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
rule maftools_tumoronly:
input: mutect2=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_variants.maf",
output: mutect2maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_merged.maf",
input: mutect2mafs=expand(config['project']['workpath']+"/mutect2_out/oncotator_out/{x}.maf"),
output: mutect2maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_filtered.maf",
mutect2summary=config['project']['workpath']+"/mutect2_out/mutect2_maf_summary.pdf",
mutect2oncoprint=config['project']['workpath']+"/mutect2_out/mutect2_oncoplot.pdf",
params: dir=config['project']['workpath'],rname="pl:maftools"
shell: "perl Scripts/prep_mafs.pl {input.mutect2} mutect2; module load R/3.4.0_gcc-6.2.0; Rscript Scripts/maftools.R {params.dir}/mutect2_out/oncotator_out/ mutect2_merged.maf {params.dir}/mutect2_out/mutect2_maf_summary {output.mutect2oncoprint}"
shell: "cat mutect2_out/oncotator_out/*.maf > mutect2_out/oncotator_out/mutect2_variants.maf; perl Scripts/prep_mafs.pl mutect2_out/oncotator_out/mutect2_variants.maf mutect2; module load R/3.4.0_gcc-6.2.0; Rscript Scripts/maftools.R {params.dir}/mutect2_out/oncotator_out/ mutect2_filtered.maf {params.dir}/mutect2_out/mutect2_maf_summary {output.mutect2oncoprint}"
37 changes: 22 additions & 15 deletions Rules/mutsig_mutect.rl
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
if config['project']['annotation'] == "hg19":
rule mutsig_mutect:
input: expand(config['project']['workpath']+"/mutect_out/oncotator_out/{p}.maf",p=pairs)
num = sum(1 for line in open('pairs'))

if num > 10:
if config['project']['annotation'] == "hg19":
rule mutsig_mutect:
input: config['project']['workpath']+"/mutect_out/oncotator_out/mutect_filtered.maf"
output: genes=config['project']['workpath']+"/mutect_out/mutsigCV_out/somatic.sig_genes.txt",
params: rname="pl:mutsig_mutect"
shell: "module load MutSig; MutSigCV {input} $MUTSIG_REF/exome_full192.coverage.txt $MUTSIG_REF/gene.covariates.txt mutect_out/mutsigCV_out/somatic $MUTSIG_REF/mutation_type_dictionary_file.txt $MUTSIG_REF/chr_files_hg19"

elif config['project']['annotation'] == "hg38":
rule mutsig_mutect:
input: config['project']['workpath']+"/mutect_out/oncotator_out/mutect_filtered.maf"
output: genes=config['project']['workpath']+"/mutect_out/mutsigCV_out/somatic.sig_genes.txt",
maf=config['project']['workpath']+"/mutect_out/oncotator_out/mutect_variants.maf"
params: rname="pl:mutsig_mutect"
shell: "cat mutect_out/oncotator_out/*.maf > {output.maf}; module load MutSig; MutSigCV {output.maf} $MUTSIG_REF/exome_full192.coverage.txt $MUTSIG_REF/gene.covariates.txt mutect_out/mutsigCV_out/somatic $MUTSIG_REF/mutation_type_dictionary_file.txt $MUTSIG_REF/chr_files_hg19"
shell: "echo \"null\" > {output.genes}"

elif config['project']['annotation'] == "hg38":
rule mutsig_mutect:
input: expand(config['project']['workpath']+"/mutect_out/oncotator_out/{p}.maf",p=pairs)
elif config['project']['annotation'] == "mm10":
rule mutsig_mutect:
input: config['project']['workpath']+"/mutect_out/oncotator_out/mutect_filtered.maf"
output: genes=config['project']['workpath']+"/mutect_out/mutsigCV_out/somatic.sig_genes.txt",
maf=config['project']['workpath']+"/mutect_out/oncotator_out/mutect_variants.maf"
params: rname="pl:mutsig_mutect"
shell: "cat mutect_out/oncotator_out/*.maf > {output.maf}; echo \"null\" > {output.genes}"
shell: "echo \"null\" > {output.genes}"

elif config['project']['annotation'] == "mm10":
rule mutsig_mutect:
input: expand(config['project']['workpath']+"/mutect_out/oncotator_out/{p}.maf",p=pairs)
else:
rule mutsig_mutect:
input: config['project']['workpath']+"/mutect_out/oncotator_out/mutect_filtered.maf"
output: genes=config['project']['workpath']+"/mutect_out/mutsigCV_out/somatic.sig_genes.txt",
maf=config['project']['workpath']+"/mutect_out/oncotator_out/mutect_variants.maf"
params: rname="pl:mutsig_mutect"
shell: "cat mutect_out/oncotator_out/*.maf > {output.maf}; echo \"null\" > {output.genes}"
shell: "echo \"null\" > {output.genes}"
37 changes: 22 additions & 15 deletions Rules/mutsig_mutect2.rl
Original file line number Diff line number Diff line change
@@ -1,23 +1,30 @@
if config['project']['annotation'] == "hg19":
rule mutsig_mutect2:
input: expand(config['project']['workpath']+"/mutect2_out/oncotator_out/{p}.maf",p=pairs)
num = sum(1 for line in open('pairs'))

if num > 10:
if config['project']['annotation'] == "hg19":
rule mutsig_mutect2:
input: config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_filtered.maf",
output: genes=config['project']['workpath']+"/mutect2_out/mutsigCV_out/somatic.sig_genes.txt",
maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_variants.maf"
params: rname="pl:mutsig_mutect2"
shell: "cat mutect2_out/oncotator_out/*.maf > {output.maf}; module load MutSig; MutSigCV {output.maf} $MUTSIG_REF/exome_full192.coverage.txt $MUTSIG_REF/gene.covariates.txt mutect2_out/mutsigCV_out/somatic $MUTSIG_REF/mutation_type_dictionary_file.txt $MUTSIG_REF/chr_files_hg19"
shell: "module load MutSig; MutSigCV {input} $MUTSIG_REF/exome_full192.coverage.txt $MUTSIG_REF/gene.covariates.txt mutect2_out/mutsigCV_out/somatic $MUTSIG_REF/mutation_type_dictionary_file.txt $MUTSIG_REF/chr_files_hg19"

elif config['project']['annotation'] == "hg38":
rule mutsig_mutect2:
input: expand(config['project']['workpath']+"/mutect2_out/oncotator_out/{p}.maf",p=pairs)
elif config['project']['annotation'] == "hg38":
rule mutsig_mutect2:
input: config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_filtered.maf",
output: genes=config['project']['workpath']+"/mutect2_out/mutsigCV_out/somatic.sig_genes.txt",
params: rname="pl:mutsig_mutect2"
shell: "echo \"null\" > {output.genes}"

elif config['project']['annotation'] == "mm10":
rule mutsig_mutect2:
input: config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_filtered.maf",
output: genes=config['project']['workpath']+"/mutect2_out/mutsigCV_out/somatic.sig_genes.txt",
maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_variants.maf"
params: rname="pl:mutsig_mutect2"
shell: "cat mutect2_out/oncotator_out/*.maf > {output.maf}; echo \"null\" > {output.genes}"
shell: "echo \"null\" > {output.genes}"

elif config['project']['annotation'] == "mm10":
rule mutsig_mutect2:
input: expand(config['project']['workpath']+"/mutect2_out/oncotator_out/{p}.maf",p=pairs)
else:
rule mutsig_mutect2:
input: config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_filtered.maf",
output: genes=config['project']['workpath']+"/mutect2_out/mutsigCV_out/somatic.sig_genes.txt",
maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_variants.maf"
params: rname="pl:mutsig_mutect2"
shell: "cat mutect2_out/oncotator_out/*.maf > {output.maf}; echo \"null\" > {output.genes}"
shell: "echo \"null\" > {output.genes}"
38 changes: 23 additions & 15 deletions Rules/mutsig_mutect2_tumoronly.rl
Original file line number Diff line number Diff line change
@@ -1,23 +1,31 @@
if config['project']['annotation'] == "hg19":
rule mutsig_mutect2_tumoronly:
input: expand(config['project']['workpath']+"/mutect2_out/oncotator_out/{s}.maf",s=samples)
import glob
samps = len(glob.glob1(myPath,"*.recal.bam"))

if samps>10:
if config['project']['annotation'] == "hg19":
rule mutsig_mutect2_tumoronly:
input: config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_filtered.maf",
output: genes=config['project']['workpath']+"/mutect2_out/mutsigCV_out/somatic.sig_genes.txt",
maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_variants.maf"
params: rname="pl:mutsig_mutect2"
shell: "cat mutect2_out/oncotator_out/*.maf > {output.maf}; module load MutSig; MutSigCV {output.maf} $MUTSIG_REF/exome_full192.coverage.txt $MUTSIG_REF/gene.covariates.txt mutect2_out/mutsigCV_out/somatic $MUTSIG_REF/mutation_type_dictionary_file.txt $MUTSIG_REF/chr_files_hg19"
shell: "module load MutSig; MutSigCV {input} $MUTSIG_REF/exome_full192.coverage.txt $MUTSIG_REF/gene.covariates.txt mutect2_out/mutsigCV_out/somatic $MUTSIG_REF/mutation_type_dictionary_file.txt $MUTSIG_REF/chr_files_hg19"

elif config['project']['annotation'] == "hg38":
rule mutsig_mutect2_tumoronly:
input: expand(config['project']['workpath']+"/mutect2_out/oncotator_out/{s}.maf",s=samples)
elif config['project']['annotation'] == "hg38":
rule mutsig_mutect2_tumoronly:
input: config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_filtered.maf",
output: genes=config['project']['workpath']+"/mutect2_out/mutsigCV_out/somatic.sig_genes.txt",
params: rname="pl:mutsig_mutect2"
shell: "echo \"null\" > {output.genes}"

elif config['project']['annotation'] == "mm10":
rule mutsig_mutect2_tumoronly:
input: config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_filtered.maf",
output: genes=config['project']['workpath']+"/mutect2_out/mutsigCV_out/somatic.sig_genes.txt",
maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_variants.maf"
params: rname="pl:mutsig_mutect2"
shell: "cat mutect2_out/oncotator_out/*.maf > {output.maf}; echo \"null\" > {output.genes}"
shell: "echo \"null\" > {output.genes}"

elif config['project']['annotation'] == "mm10":
rule mutsig_mutect2_tumoronly:
input: expand(config['project']['workpath']+"/mutect2_out/oncotator_out/{s}.maf",s=samples)
else:
rule mutsig_mutect2_tumoronly:
input: config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_filtered.maf",
output: genes=config['project']['workpath']+"/mutect2_out/mutsigCV_out/somatic.sig_genes.txt",
maf=config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_variants.maf"
params: rname="pl:mutsig_mutect2"
shell: "cat mutect2_out/oncotator_out/*.maf > {output.maf}; echo \"null\" > {output.genes}"
shell: "echo \"null\" > {output.genes}"
Loading

0 comments on commit 6b1c988

Please sign in to comment.