Skip to content
This repository has been archived by the owner on Oct 23, 2023. It is now read-only.

Commit

Permalink
completed multi-caller additions to tumor-only somatic pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
jlac committed Jun 3, 2019
1 parent f00d7cb commit dbf4d75
Show file tree
Hide file tree
Showing 10 changed files with 23 additions and 25 deletions.
2 changes: 1 addition & 1 deletion Results-template/Scripts/admixplot.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ admixture <- read.table("admixture_out/admixture_table.tsv",header = TRUE,row.na
admix2 <- as.matrix(admixture)
admix2<-t(admix2)
png('admixture_out/admixture_mqc.png',width = 1000)
barplot(admix2,legend.text=TRUE,las=2,cex.names = 0.5,main = "Admixture",axis.lty=25)
barplot(admix2,legend.text=TRUE,las=2,cex.names = 0.5,main = "Admixture",axis.lty=25,col=c("red","blue","yellow","pink","green"))
dev.off()
8 changes: 4 additions & 4 deletions Results-template/Scripts/prep_mafs.pl
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@
#my $mergedmaf = $ARGV[1] . '_out/oncotator_out/' . $ARGV[1] . '_merged.maf'; #to fix...
#open C, ">$mergedmaf";

my $finalmaf = $ARGV[1] . '_out/oncotator_out/final_filtered.maf'; #to fix...
my $finalmaf = $ARGV[1] . '/oncotator_out/final_filtered.maf'; #to fix...
open C, ">$finalmaf";

my $maffile = $ARGV[0]; #to fix...
my $fixedmaf = $ARGV[1] . '_out/oncotator_out/' . $ARGV[1] . '_variants_fixed.maf';
my $fixedmaf = $ARGV[1] . '/oncotator_out/variants_fixed.maf';
my @line = ();
my $header='null';
my @variants=();
Expand Down Expand Up @@ -41,8 +41,8 @@
elsif ($line[0] !~ m'#') {
next if ($line[0] =~ /TTN|MUC16|OBSCN|AHNAK2|SYNE1|FLG|MUC5B|DNAH17|PLEC|DST|SYNE2|NEB|HSPG2|LAMA5|AHNAK|HMCN1|USH2A|DNAH11|MACF1|MUC17|DNAH5|GPR98|FAT1|PKD1|MDN1|RNF213|RYR1|DNAH2|DNAH3|DNAH8|DNAH1|DNAH9|ABCA13|SRRM2|CUBN|SPTBN5|PKHD1|LRP2|FBN3|CDH23|DNAH10|FAT4|RYR3|PKHD1L1|FAT2|CSMD1|PCNT|COL6A3|FRAS1|FCGBP|RYR2|HYDIN|XIRP2|LAMA1/);
if (($line[44] eq '-') || ($line[44] < 2)) {
if (($line[41] > 2) && ($line[39] > 9)) {
if ((($line[123] eq '-') || ($line[123] < 0.001)) && (($line[76] eq '-') || ($line[76] < 0.01)) && (($line[99] eq '-') || ($line[99] < 0.001)) && (($line[41]/$line[39]) > 0.05)) {
if (($line[41] > 4) && ($line[39] > 20)) {
if ((($line[123] eq '-') || ($line[123] < 0.001)) && (($line[76] eq '-') || ($line[76] < 0.001)) && (($line[99] eq '-') || ($line[99] < 0.001)) && (($line[41]/$line[39]) > 0.05)) {
print C "$_\t";
if ($line[39] != 0){
$calc=($line[41]/$line[39]);
Expand Down
2 changes: 1 addition & 1 deletion Rules/admixture_germline.rl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ rule admixture_germline:
params: gatk=config['bin'][pfamily]['GATK'],ref=config['project']['annotation'],genome=config['references'][pfamily]['GENOME'],key=config['references'][pfamily]['ADMIXTUREKEY'],refcount=config['references'][pfamily]['ADMIXTUREREFS'],knowns=config['references'][pfamily]['KNOWNANCESTRY'],rname="admixture"
threads: 8
shell: """
mkdir -p admixture_out; module load vcftools; vcftools --vcf {input} --remove-indels --max-missing 1 --recode --recode-INFO-all --out admixture_out/samples_noINDEL_nomissing; module load GATK/3.5-0; GATK -m 48G CombineVariants -R {params.genome} --genotypemergeoption UNSORTED -o {output.mergedvcf} --variant {params.knowns} --variant {input} --minimumN 2 -nt 4; vcftools --vcf {output.mergedvcf} --maf 0.05 --remove-indels --plink --out admixture_out/samples_and_knowns_filtered; module load plink/1.9.0-beta4.4; plink --noweb --recode12 --out admixture_out/samples_and_knowns_filtered_recode --file admixture_out/samples_and_knowns_filtered; perl Scripts/admixture_prep.pl {params.key} admixture_out/samples_and_knowns_filtered_recode.pop admixture_out/samples_and_knowns_filtered_recode.ped; /data/CCBR_Pipeliner/db/PipeDB/bin/admixture_linux-1.3.0/admixture admixture_out/samples_and_knowns_filtered_recode.ped {params.refcount} --supervised -j32; mv samples_and_knowns_filtered_recode.{params.refcount}.P admixture_out/samples_and_knowns_filtered_recode.P; mv samples_and_knowns_filtered_recode.{params.refcount}.Q admixture_out/samples_and_knowns_filtered_recode.Q; perl Scripts/admixture_post.pl {params.key} {output.table} {output.admix} {params.ref} {output.recodeped}; module load R/3.5; Rscript Scripts/admixplot.R
mkdir -p admixture_out; module load vcftools; vcftools --vcf {input} --remove-indels --max-missing 1 --recode --recode-INFO-all --out admixture_out/samples_noINDEL_nomissing; module load GATK/3.5-0; GATK -m 48G CombineVariants -R {params.genome} --genotypemergeoption UNSORTED -o {output.mergedvcf} --variant {params.knowns} --variant admixture_out/samples_noINDEL_nomissing.recode.vcf --minimumN 2 -nt 4; vcftools --vcf {output.mergedvcf} --maf 0.05 --remove-indels --plink --out admixture_out/samples_and_knowns_filtered; module load plink/1.9.0-beta4.4; plink --noweb --recode12 --out admixture_out/samples_and_knowns_filtered_recode --file admixture_out/samples_and_knowns_filtered; perl Scripts/admixture_prep.pl {params.key} admixture_out/samples_and_knowns_filtered_recode.pop admixture_out/samples_and_knowns_filtered_recode.ped; /data/CCBR_Pipeliner/db/PipeDB/bin/admixture_linux-1.3.0/admixture admixture_out/samples_and_knowns_filtered_recode.ped {params.refcount} --supervised -j32; mv samples_and_knowns_filtered_recode.{params.refcount}.P admixture_out/samples_and_knowns_filtered_recode.P; mv samples_and_knowns_filtered_recode.{params.refcount}.Q admixture_out/samples_and_knowns_filtered_recode.Q; perl Scripts/admixture_post.pl {params.key} {output.table} {output.admix} {params.ref} {output.recodeped}; module load R/3.5; Rscript Scripts/admixplot.R

"""
2 changes: 1 addition & 1 deletion Rules/admixture_somatic.rl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ rule admixture_somatic:
params: gatk=config['bin'][pfamily]['GATK'],ref=config['project']['annotation'],regions="exome_targets.bed",genome=config['references'][pfamily]['GENOME'],key=config['references'][pfamily]['ADMIXTUREKEY'],refcount=config['references'][pfamily]['ADMIXTUREREFS'],knowns=config['references'][pfamily]['KNOWNANCESTRY'],rname="admixture"
threads: 8
shell: """
mkdir -p admixture_out; module load vcftools; vcftools --vcf {input} --remove-indels --max-missing 1 --recode --recode-INFO-all --out admixture_out/samples_noINDEL_nomissing; module load GATK/3.8-0; java -Xmx48g -Djava.io.tmpdir=/lscratch/$SLURM_JOBID -jar $GATK_JAR -T CombineVariants -R {params.genome} --genotypemergeoption UNSORTED -o {output.mergedvcf} --variant {params.knowns} --variant {input} -L {params.regions} --minimumN 2 -nt 1; vcftools --vcf {output.mergedvcf} --maf 0.05 --remove-indels --plink --out admixture_out/samples_and_knowns_filtered; module load plink/1.9.0-beta4.4; plink --noweb --recode12 --out admixture_out/samples_and_knowns_filtered_recode --file admixture_out/samples_and_knowns_filtered; perl Scripts/admixture_prep.pl {params.key} admixture_out/samples_and_knowns_filtered_recode.pop admixture_out/samples_and_knowns_filtered_recode.ped; /data/CCBR_Pipeliner/db/PipeDB/bin/admixture_linux-1.3.0/admixture admixture_out/samples_and_knowns_filtered_recode.ped {params.refcount} --supervised -j32; mv samples_and_knowns_filtered_recode.{params.refcount}.P admixture_out/samples_and_knowns_filtered_recode.P; mv samples_and_knowns_filtered_recode.{params.refcount}.Q admixture_out/samples_and_knowns_filtered_recode.Q; perl Scripts/admixture_post.pl {params.key} {output.table} {output.admix} {params.ref} {output.recodeped}; module load R/3.5; Rscript Scripts/admixplot.R
mkdir -p admixture_out; module load vcftools; vcftools --vcf {input} --remove-indels --max-missing 1 --recode --recode-INFO-all --out admixture_out/samples_noINDEL_nomissing; module load GATK/3.8-0; java -Xmx48g -Djava.io.tmpdir=/lscratch/$SLURM_JOBID -jar $GATK_JAR -T CombineVariants -R {params.genome} --genotypemergeoption UNSORTED -o {output.mergedvcf} --variant admixture_out/samples_noINDEL_nomissing.recode.vcf --variant {input} -L {params.regions} --minimumN 2 -nt 1; vcftools --vcf {output.mergedvcf} --maf 0.05 --remove-indels --plink --out admixture_out/samples_and_knowns_filtered; module load plink/1.9.0-beta4.4; plink --noweb --recode12 --out admixture_out/samples_and_knowns_filtered_recode --file admixture_out/samples_and_knowns_filtered; perl Scripts/admixture_prep.pl {params.key} admixture_out/samples_and_knowns_filtered_recode.pop admixture_out/samples_and_knowns_filtered_recode.ped; /data/CCBR_Pipeliner/db/PipeDB/bin/admixture_linux-1.3.0/admixture admixture_out/samples_and_knowns_filtered_recode.ped {params.refcount} --supervised -j32; mv samples_and_knowns_filtered_recode.{params.refcount}.P admixture_out/samples_and_knowns_filtered_recode.P; mv samples_and_knowns_filtered_recode.{params.refcount}.Q admixture_out/samples_and_knowns_filtered_recode.Q; perl Scripts/admixture_post.pl {params.key} {output.table} {output.admix} {params.ref} {output.recodeped}; module load R/3.5; Rscript Scripts/admixplot.R

"""
2 changes: 1 addition & 1 deletion Rules/admixture_wgs.rl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ rule admixture_wgs:
params: gatk=config['bin'][pfamily]['GATK'],ref=config['project']['annotation'],genome=config['references'][pfamily]['GENOME'],key=config['references'][pfamily]['ADMIXTUREKEY'],refcount=config['references'][pfamily]['ADMIXTUREREFS'],knowns=config['references'][pfamily]['KNOWNANCESTRY'],rname="admixture"
threads: 8
shell: """
mkdir -p admixture_out; module load vcftools; vcftools --vcf {input} --remove-indels --max-missing 1 --recode --recode-INFO-all --out admixture_out/samples_noINDEL_nomissing; module load GATK/3.5-0; GATK -m 48G CombineVariants -R {params.genome} --genotypemergeoption UNSORTED -o {output.mergedvcf} --variant {params.knowns} --variant {input} --minimumN 2 -nt 4; vcftools --vcf {output.mergedvcf} --maf 0.05 --remove-indels --plink --out admixture_out/samples_and_knowns_filtered; module load plink/1.9.0-beta4.4; plink --noweb --recode12 --out admixture_out/samples_and_knowns_filtered_recode --file admixture_out/samples_and_knowns_filtered; perl Scripts/admixture_prep.pl {params.key} admixture_out/samples_and_knowns_filtered_recode.pop admixture_out/samples_and_knowns_filtered_recode.ped; /data/CCBR_Pipeliner/db/PipeDB/bin/admixture_linux-1.3.0/admixture admixture_out/samples_and_knowns_filtered_recode.ped {params.refcount} --supervised -j32; mv samples_and_knowns_filtered_recode.{params.refcount}.P admixture_out/samples_and_knowns_filtered_recode.P; mv samples_and_knowns_filtered_recode.{params.refcount}.Q admixture_out/samples_and_knowns_filtered_recode.Q; perl Scripts/admixture_post.pl {params.key} {output.table} {output.admix} {params.ref} {output.recodeped}; module load R/3.5; Rscript Scripts/admixplot.R
mkdir -p admixture_out; module load vcftools; vcftools --vcf {input} --remove-indels --max-missing 1 --recode --recode-INFO-all --out admixture_out/samples_noINDEL_nomissing; module load GATK/3.5-0; GATK -m 48G CombineVariants -R {params.genome} --genotypemergeoption UNSORTED -o {output.mergedvcf} --variant {params.knowns} --variant admixture_out/samples_noINDEL_nomissing.recode.vcf --minimumN 2 -nt 4; vcftools --vcf {output.mergedvcf} --maf 0.05 --remove-indels --plink --out admixture_out/samples_and_knowns_filtered; module load plink/1.9.0-beta4.4; plink --noweb --recode12 --out admixture_out/samples_and_knowns_filtered_recode --file admixture_out/samples_and_knowns_filtered; perl Scripts/admixture_prep.pl {params.key} admixture_out/samples_and_knowns_filtered_recode.pop admixture_out/samples_and_knowns_filtered_recode.ped; /data/CCBR_Pipeliner/db/PipeDB/bin/admixture_linux-1.3.0/admixture admixture_out/samples_and_knowns_filtered_recode.ped {params.refcount} --supervised -j32; mv samples_and_knowns_filtered_recode.{params.refcount}.P admixture_out/samples_and_knowns_filtered_recode.P; mv samples_and_knowns_filtered_recode.{params.refcount}.Q admixture_out/samples_and_knowns_filtered_recode.Q; perl Scripts/admixture_post.pl {params.key} {output.table} {output.admix} {params.ref} {output.recodeped}; module load R/3.5; Rscript Scripts/admixplot.R

"""
2 changes: 1 addition & 1 deletion Rules/admixture_wgs_somatic.rl
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,6 @@ rule admixture_wgs_somatic:
params: gatk=config['bin'][pfamily]['GATK'],ref=config['project']['annotation'],genome=config['references'][pfamily]['GENOME'],key=config['references'][pfamily]['ADMIXTUREKEY'],refcount=config['references'][pfamily]['ADMIXTUREREFS'],knowns=config['references'][pfamily]['KNOWNANCESTRY'],rname="admixture"
threads: 8
shell: """
mkdir -p admixture_out; module load vcftools; vcftools --vcf {input} --remove-indels --max-missing 1 --recode --recode-INFO-all --out admixture_out/samples_noINDEL_nomissing; module load GATK/3.5-0; GATK -m 48G CombineVariants -R {params.genome} --genotypemergeoption UNSORTED -o {output.mergedvcf} --variant {params.knowns} --variant {input} --minimumN 2 -nt 4; vcftools --vcf {output.mergedvcf} --maf 0.05 --remove-indels --plink --out admixture_out/samples_and_knowns_filtered; module load plink/1.9.0-beta4.4; plink --noweb --recode12 --out admixture_out/samples_and_knowns_filtered_recode --file admixture_out/samples_and_knowns_filtered; perl Scripts/admixture_prep.pl {params.key} admixture_out/samples_and_knowns_filtered_recode.pop admixture_out/samples_and_knowns_filtered_recode.ped; /data/CCBR_Pipeliner/db/PipeDB/bin/admixture_linux-1.3.0/admixture admixture_out/samples_and_knowns_filtered_recode.ped {params.refcount} --supervised -j32; mv samples_and_knowns_filtered_recode.{params.refcount}.P admixture_out/samples_and_knowns_filtered_recode.P; mv samples_and_knowns_filtered_recode.{params.refcount}.Q admixture_out/samples_and_knowns_filtered_recode.Q; perl Scripts/admixture_post.pl {params.key} {output.table} {output.admix} {params.ref} {output.recodeped}; module load R/3.5; Rscript Scripts/admixplot.R
mkdir -p admixture_out; module load vcftools; vcftools --vcf {input} --remove-indels --max-missing 1 --recode --recode-INFO-all --out admixture_out/samples_noINDEL_nomissing; module load GATK/3.5-0; GATK -m 48G CombineVariants -R {params.genome} --genotypemergeoption UNSORTED -o {output.mergedvcf} --variant {params.knowns} --variant admixture_out/samples_noINDEL_nomissing.recode.vcf --minimumN 2 -nt 4; vcftools --vcf {output.mergedvcf} --maf 0.05 --remove-indels --plink --out admixture_out/samples_and_knowns_filtered; module load plink/1.9.0-beta4.4; plink --noweb --recode12 --out admixture_out/samples_and_knowns_filtered_recode --file admixture_out/samples_and_knowns_filtered; perl Scripts/admixture_prep.pl {params.key} admixture_out/samples_and_knowns_filtered_recode.pop admixture_out/samples_and_knowns_filtered_recode.ped; /data/CCBR_Pipeliner/db/PipeDB/bin/admixture_linux-1.3.0/admixture admixture_out/samples_and_knowns_filtered_recode.ped {params.refcount} --supervised -j32; mv samples_and_knowns_filtered_recode.{params.refcount}.P admixture_out/samples_and_knowns_filtered_recode.P; mv samples_and_knowns_filtered_recode.{params.refcount}.Q admixture_out/samples_and_knowns_filtered_recode.Q; perl Scripts/admixture_post.pl {params.key} {output.table} {output.admix} {params.ref} {output.recodeped}; module load R/3.5; Rscript Scripts/admixplot.R

"""
2 changes: 0 additions & 2 deletions Rules/all-wgs-somatic.rl
Original file line number Diff line number Diff line change
Expand Up @@ -107,8 +107,6 @@ elif config['project']['annotation'] == "mm10":
"admixture_out/admixture_table.tsv",
expand("svaba_out/{p}.log", p=pairs),
expand("canvas_out/{p}/tumor_CNV.vcf.gz", p=pairs),
expand("sequenza_out/{p}"+"_alternative_solutions.txt",p=pairs),
expand("freec_out/pass2/{p}"+".recal.bam_CNVs",p=pairs),
output:
params: rname="final"
shell: """
Expand Down
16 changes: 8 additions & 8 deletions Rules/maftools.rl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ rule maftools_mutect2:
summary=config['project']['workpath']+"/mutect2_out/mutect2_maf_summary.pdf",
oncoprint=config['project']['workpath']+"/mutect2_out/mutect2_oncoplot.pdf",
params: dir=config['project']['workpath'],rname="pl:maftools"
shell: "cat mutect2_out/oncotator_out/*.maf > mutect2_out/oncotator_out/mutect2_variants.maf; perl Scripts/prep_mafs.pl mutect2_out/oncotator_out/mutect2_variants.maf mutect2; module load R/3.5; Rscript Scripts/maftools.R {params.dir}/mutect2_out/oncotator_out/ final_filtered.maf {params.dir}/mutect2_out/mutect2_maf_summary.pdf {output.oncoprint}"
shell: "cat mutect2_out/oncotator_out/*.maf > mutect2_out/oncotator_out/mutect2_variants.maf; perl Scripts/prep_mafs.pl mutect2_out/oncotator_out/mutect2_variants.maf mutect2_out; module load R/3.5; Rscript Scripts/maftools.R {params.dir}/mutect2_out/oncotator_out/ final_filtered.maf {params.dir}/mutect2_out/mutect2_maf_summary.pdf {output.oncoprint}"

rule maftools_mutect:
input: expand(config['project']['workpath']+"/mutect_out/oncotator_out/{x}.maf",x=pairs),
Expand All @@ -14,7 +14,7 @@ rule maftools_mutect:
summary=config['project']['workpath']+"/mutect_out/mutect_maf_summary.pdf",
oncoprint=config['project']['workpath']+"/mutect_out/mutect_oncoplot.pdf",
params: dir=config['project']['workpath'],rname="pl:maftools"
shell: "cat mutect_out/oncotator_out/*.maf > mutect_out/oncotator_out/mutect_variants.maf; perl Scripts/prep_mafs.pl mutect_out/oncotator_out/mutect_variants.maf mutect; module load R/3.5; Rscript Scripts/maftools.R {params.dir}/mutect_out/oncotator_out/ final_filtered.maf {params.dir}/mutect_out/mutect_maf_summary.pdf {output.oncoprint}"
shell: "cat mutect_out/oncotator_out/*.maf > mutect_out/oncotator_out/mutect_variants.maf; perl Scripts/prep_mafs.pl mutect_out/oncotator_out/mutect_variants.maf mutect_out; module load R/3.5; Rscript Scripts/maftools.R {params.dir}/mutect_out/oncotator_out/ final_filtered.maf {params.dir}/mutect_out/mutect_maf_summary.pdf {output.oncoprint}"

rule maftools_strelka:
input: expand(config['project']['workpath']+"/strelka_out/oncotator_out/{x}.maf",x=pairs),
Expand All @@ -23,13 +23,13 @@ rule maftools_strelka:
summary=config['project']['workpath']+"/strelka_out/strelka_maf_summary.pdf",
oncoprint=config['project']['workpath']+"/strelka_out/strelka_oncoplot.pdf",
params: dir=config['project']['workpath'],rname="pl:maftools"
shell: "cat strelka_out/oncotator_out/*.maf > strelka_out/oncotator_out/strelka_variants.maf; perl Scripts/prep_mafs.pl strelka_out/oncotator_out/strelka_variants.maf strelka; module load R/3.5; Rscript Scripts/maftools.R {params.dir}/strelka_out/oncotator_out/ final_filtered.maf {params.dir}/strelka_out/strelka_maf_summary.pdf {output.oncoprint}"
shell: "cat strelka_out/oncotator_out/*.maf > strelka_out/oncotator_out/strelka_variants.maf; perl Scripts/prep_mafs.pl strelka_out/oncotator_out/strelka_variants.maf strelka_out; module load R/3.5; Rscript Scripts/maftools.R {params.dir}/strelka_out/oncotator_out/ final_filtered.maf {params.dir}/strelka_out/strelka_maf_summary.pdf {output.oncoprint}"

rule maftools_merged:
input: expand(config['project']['workpath']+"/mutect2_out/oncotator_out/{x}.maf",x=pairs),
output: pre=temp(config['project']['workpath']+"/mutect2_out/oncotator_out/mutect2_variants.maf"),
fin=config['project']['workpath']+"/mutect2_out/oncotator_out/final_filtered.maf",
summary=config['project']['workpath']+"/mutect2_out/mutect2_maf_summary.pdf",
oncoprint=config['project']['workpath']+"/mutect2_out/mutect2_oncoplot.pdf",
input: expand(config['project']['workpath']+"/merged_somatic_variants/oncotator_out/{x}.maf",x=pairs),
output: pre=temp(config['project']['workpath']+"/merged_somatic_variants/oncotator_out/merged_variants.maf"),
fin=config['project']['workpath']+"/merged_somatic_variants/oncotator_out/final_filtered.maf",
summary=config['project']['workpath']+"/merged_somatic_variants/merged_maf_summary.pdf",
oncoprint=config['project']['workpath']+"/merged_somatic_variants/merged_oncoplot.pdf",
params: dir=config['project']['workpath'],rname="pl:maftools"
shell: "cat mutect2_out/oncotator_out/*.maf > mutect2_out/oncotator_out/mutect2_variants.maf; perl Scripts/prep_mafs.pl mutect2_out/oncotator_out/mutect2_variants.maf mutect2; module load R/3.5; Rscript Scripts/maftools.R {params.dir}/mutect2_out/oncotator_out/ final_filtered.maf {params.dir}/mutect2_out/mutect2_maf_summary.pdf {output.oncoprint}"
Loading

0 comments on commit dbf4d75

Please sign in to comment.