Skip to content
This repository has been archived by the owner on Oct 23, 2023. It is now read-only.

Commit

Permalink
Merge pull request #426 from jlac/activeDev
Browse files Browse the repository at this point in the history
Added merged output for FusionInspector and Oncofuse across STARfusion and FusionCatcher
  • Loading branch information
jlac authored Aug 30, 2019
2 parents fa276d4 + e656192 commit eb80395
Show file tree
Hide file tree
Showing 21 changed files with 119 additions and 57 deletions.
2 changes: 1 addition & 1 deletion Results-template/Scripts/make_sample_network.pl
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
my $cmd = '';
my $vcf=shift;

$cmd = 'module load vcftools; vcftools --vcf ' . $vcf . ' --plink --remove-indels --out plink';
$cmd = 'module load vcftools; vcftools --gzvcf ' . $vcf . ' --plink --remove-indels --out plink';
system($cmd);
$cmd = 'module load plink/1.9.0-beta4.4; plink --file plink --distance-matrix --out distance';
system($cmd);
Expand Down
4 changes: 2 additions & 2 deletions Rules/admixture_germline.rl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
rule admixture_germline:
input: "exome.strictFilter.vcf"
input: "exome.strictFilter.vcf.gz"
output: vcf=temp("admixture_out/samples_noINDEL_nomissing.recode.vcf"),
mergedvcf=temp("admixture_out/samples_and_knowns.vcf"),
ped=temp("admixture_out/samples_and_knowns_filtered.ped"),
Expand All @@ -11,6 +11,6 @@ rule admixture_germline:
params: gatk=config['bin'][pfamily]['GATK'],ref=config['project']['annotation'],genome=config['references'][pfamily]['GENOME'],key=config['references'][pfamily]['ADMIXTUREKEY'],refcount=config['references'][pfamily]['ADMIXTUREREFS'],knowns=config['references'][pfamily]['KNOWNANCESTRY'],rname="admixture"
threads: 8
shell: """
mkdir -p admixture_out; module load vcftools; vcftools --vcf {input} --remove-indels --max-missing 1 --recode --recode-INFO-all --out admixture_out/samples_noINDEL_nomissing; module load GATK/3.5-0; GATK -m 48G CombineVariants -R {params.genome} --genotypemergeoption UNSORTED -o {output.mergedvcf} --variant {params.knowns} --variant admixture_out/samples_noINDEL_nomissing.recode.vcf --minimumN 2 -nt 4; vcftools --vcf {output.mergedvcf} --maf 0.05 --remove-indels --plink --out admixture_out/samples_and_knowns_filtered; module load plink/1.9.0-beta4.4; plink --noweb --recode12 --out admixture_out/samples_and_knowns_filtered_recode --file admixture_out/samples_and_knowns_filtered; perl Scripts/admixture_prep.pl {params.key} admixture_out/samples_and_knowns_filtered_recode.pop admixture_out/samples_and_knowns_filtered_recode.ped; /data/CCBR_Pipeliner/db/PipeDB/bin/admixture_linux-1.3.0/admixture admixture_out/samples_and_knowns_filtered_recode.ped {params.refcount} --supervised -j32; mv samples_and_knowns_filtered_recode.{params.refcount}.P admixture_out/samples_and_knowns_filtered_recode.P; mv samples_and_knowns_filtered_recode.{params.refcount}.Q admixture_out/samples_and_knowns_filtered_recode.Q; perl Scripts/admixture_post.pl {params.key} {output.table} {output.admix} {params.ref} {output.recodeped}; module load R/3.5; Rscript Scripts/admixplot.R
mkdir -p admixture_out; module load vcftools; vcftools --gzvcf {input} --remove-indels --max-missing 1 --recode --recode-INFO-all --out admixture_out/samples_noINDEL_nomissing; module load GATK/3.5-0; GATK -m 48G CombineVariants -R {params.genome} --genotypemergeoption UNSORTED -o {output.mergedvcf} --variant {params.knowns} --variant admixture_out/samples_noINDEL_nomissing.recode.vcf --minimumN 2 -nt 4; vcftools --vcf {output.mergedvcf} --maf 0.05 --remove-indels --plink --out admixture_out/samples_and_knowns_filtered; module load plink/1.9.0-beta4.4; plink --noweb --recode12 --out admixture_out/samples_and_knowns_filtered_recode --file admixture_out/samples_and_knowns_filtered; perl Scripts/admixture_prep.pl {params.key} admixture_out/samples_and_knowns_filtered_recode.pop admixture_out/samples_and_knowns_filtered_recode.ped; /data/CCBR_Pipeliner/db/PipeDB/bin/admixture_linux-1.3.0/admixture admixture_out/samples_and_knowns_filtered_recode.ped {params.refcount} --supervised -j32; mv samples_and_knowns_filtered_recode.{params.refcount}.P admixture_out/samples_and_knowns_filtered_recode.P; mv samples_and_knowns_filtered_recode.{params.refcount}.Q admixture_out/samples_and_knowns_filtered_recode.Q; perl Scripts/admixture_post.pl {params.key} {output.table} {output.admix} {params.ref} {output.recodeped}; module load R/3.5; Rscript Scripts/admixplot.R

"""
7 changes: 2 additions & 5 deletions Rules/all-exomeseq-germline.rl
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
rule all_exomeseq_germline:
input: "combined.vcf",
"exome.recode.vcf",
config['project']['workpath']+"/full_annot.txt.zip",
# "variants.database",
input: "combined.vcf.gz",
"sample_network_mqc.png",
"exome.snpeff.vcf",
expand("sample_vcfs/{s}"+".stats.csv",s=samples),
"exome_targets.bed",
"exome.strictFilter.vcf",
"exome.strictFilter.vcf.gz",
"manta_out/results/variants/diploidSV.vcf.gz",
"admixture_out/admixture_table.tsv"
output:
Expand Down
36 changes: 21 additions & 15 deletions Rules/all-rnafusion.rl
Original file line number Diff line number Diff line change
Expand Up @@ -22,20 +22,22 @@ if config['project']['annotation'] == "hg19":
expand("starfusion/fusioninspector/{x}/{x}.fusion_predictions.final",x=samples),
expand("fusioncatcher/fusioninspector/{x}/{x}.fusion_predictions.final",x=samples),
expand("starfusion/{x}/star-fusion.fusion_predictions.tsv",x=samples),
expand("{name}.RnaSeqMetrics.txt",name=samples),
# expand("{name}.RnaSeqMetrics.txt",name=samples),
# "expression/RawCountFile_genes_filtered.txt",
# "expression/RawCountFile_junctions_filtered.txt",
# "expression/RawCountFile_genejunctions_filtered.txt",
# expand("expression/{name}.star.count.overlap.txt",name=samples),
# "expression/RawCountFileOverlap.txt",
# "expression/RawCountFileStar.txt",
expand("{name}.rsem.genes.results",name=samples),
"RawCountFile_RSEM_genes_filtered.txt",
expand("QC/{x}_readlength.txt",x=samples),
# expand("{name}.rsem.genes.results",name=samples),
# "RawCountFile_RSEM_genes_filtered.txt",
# expand("QC/{x}_readlength.txt",x=samples),
expand("fusioninspector/{x}/{x}.fusion_predictions.final",x=samples),
expand("oncofuse/{x}/{x}.oncofuse.output",x=samples)
output:
params: rname="final"
shell: """
Scripts/fusionSummary.sh; module load multiqc/1.4; multiqc -f .; rm *featureCounts; mv *.out slurmfiles/; perl Scripts/summarize_usage.pl
Scripts/fusionSummary.sh; module load multiqc/1.7; multiqc -f .; rm *featureCounts; mv *.out slurmfiles/; perl Scripts/summarize_usage.pl

"""

Expand Down Expand Up @@ -63,20 +65,22 @@ elif config['project']['annotation'] == "hg38":
expand("starfusion/fusioninspector/{x}/{x}.fusion_predictions.final",x=samples),
expand("fusioncatcher/fusioninspector/{x}/{x}.fusion_predictions.final",x=samples),
expand("starfusion/{x}/star-fusion.fusion_predictions.tsv",x=samples),
expand("{name}.RnaSeqMetrics.txt",name=samples),
# expand("{name}.RnaSeqMetrics.txt",name=samples),
# "expression/RawCountFile_genes_filtered.txt",
# "expression/RawCountFile_junctions_filtered.txt",
# "expression/RawCountFile_genejunctions_filtered.txt",
# expand("expression/{name}.star.count.overlap.txt",name=samples),
# "expression/RawCountFileOverlap.txt",
# "expression/RawCountFileStar.txt",
expand("{name}.rsem.genes.results",name=samples),
"RawCountFile_RSEM_genes_filtered.txt",
expand("QC/{x}_readlength.txt",x=samples),
# expand("{name}.rsem.genes.results",name=samples),
# "RawCountFile_RSEM_genes_filtered.txt",
# expand("QC/{x}_readlength.txt",x=samples),
expand("fusioninspector/{x}/{x}.fusion_predictions.final",x=samples),
expand("oncofuse/{x}/{x}.oncofuse.output",x=samples)
output:
params: rname="final"
shell: """
Scripts/fusionSummary.sh; module load multiqc/1.4; multiqc -f .; mv *.out slurmfiles/; perl Scripts/summarize_usage.pl
Scripts/fusionSummary.sh; module load multiqc/1.7; multiqc -f .; mv *.out slurmfiles/; perl Scripts/summarize_usage.pl

"""

Expand All @@ -100,19 +104,21 @@ elif config['project']['annotation'] == "mm10":
expand("starfusion/fusioninspector/{x}/{x}.fusion_predictions.final",x=samples),
expand("fusioncatcher/fusioninspector/{x}/{x}.fusion_predictions.final",x=samples),
expand("starfusion/{x}/star-fusion.fusion_predictions.tsv",x=samples),
expand("{name}.RnaSeqMetrics.txt",name=samples),
# expand("{name}.RnaSeqMetrics.txt",name=samples),
# "expression/RawCountFile_genes_filtered.txt",
# "expression/RawCountFile_junctions_filtered.txt",
# "expression/RawCountFile_genejunctions_filtered.txt",
# expand("expression/{name}.star.count.overlap.txt",name=samples),
# "expression/RawCountFileOverlap.txt",
# "expression/RawCountFileStar.txt",
expand("{name}.rsem.genes.results",name=samples),
"RawCountFile_RSEM_genes_filtered.txt",
expand("QC/{x}_readlength.txt",x=samples),
# expand("{name}.rsem.genes.results",name=samples),
# "RawCountFile_RSEM_genes_filtered.txt",
# expand("QC/{x}_readlength.txt",x=samples),
expand("fusioninspector/{x}/{x}.fusion_predictions.final",x=samples),
expand("oncofuse/{x}/{x}.oncofuse.output",x=samples)
output:
params: rname="final"
shell: """
Scripts/fusionSummary.sh; module load multiqc/1.4; multiqc -f .; mv *.out slurmfiles/; perl Scripts/summarize_usage.pl
Scripts/fusionSummary.sh; module load multiqc/1.7; multiqc -f .; mv *.out slurmfiles/; perl Scripts/summarize_usage.pl

"""
2 changes: 1 addition & 1 deletion Rules/fusioncatcher.rl
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@ rule fusioncatcher:
output: "fusioncatcher/{x}/final-list_candidate-fusion-genes.txt"
params: data=config['references'][pfamily]['FUSCATCHDAT'],configfile=config['references'][pfamily]['FUSCATCHDAT'],rname='fusioncatcher',sample="{x}"
threads: 16
shell: "module load fusioncatcher/1.00; mkdir -p fusioncatcher/{params.sample}; fusioncatcher -i {input.file1},{input.file2} -o fusioncatcher/{params.sample} -d {params.data} --threads {threads}"
shell: "module load fusioncatcher/1.10; mkdir -p fusioncatcher/{params.sample}; fusioncatcher -i {input.file1},{input.file2} -o fusioncatcher/{params.sample} --threads {threads}"
5 changes: 5 additions & 0 deletions Rules/fusioninsp.rl
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
rule rule fusioninsp:
input: starfusion="starfusion/{x}/star-fusion.fusion_predictions.tsv",fusioncatcher="fusioncatcher/fusioninspector/{x}/{x}_fusionInspector.input",file1="{x}.R1.trimmed.fastq.gz",file2="{x}.R2.trimmed.fastq.gz"
output: "fusioninspector/{x}/{x}.fusion_predictions.final"
params: rname='fusioninsp',sample="{x}",ref=config['project']['annotation'],starlib=config['references'][pfamily]['STARFUSIONLIB']
shell: "module load fusioninspector/1.1.0; module load STAR/2.7.0f; mkdir -p fusioninspector/{params.sample}; FusionInspector --fusions {input.starfusion},{input.fusioncatcher} --genome_lib {params.starlib} --left_fq {input.file1} --right_fq {input.file2} --out_dir fusioninspector/{params.sample} --out_prefix {params.sample} --prep_for_IGV --CPU 16 --cleanup --annotate --examine_coding_effect"
4 changes: 2 additions & 2 deletions Rules/fusioninsp_fuscatch.rl
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
rule fusioninsp_fuscatch:
input: fusions="fusioncatcher/{x}/final-list_candidate-fusion-genes.txt",file1="{x}.R1.trimmed.fastq.gz",file2="{x}.R2.trimmed.fastq.gz"
output: "fusioncatcher/fusioninspector/{x}/{x}.fusion_predictions.final"
output: "fusioncatcher/fusioninspector/{x}/{x}.fusion_predictions.final","fusioncatcher/fusioninspector/{x}/{x}_fusionInspector.input"
params: rname='fusioninsp',sample="{x}",ref=config['project']['annotation'],starlib=config['references'][pfamily]['STARFUSIONLIB']
threads: 8
shell: "module load fusioninspector/1.1.0; module load python/2.7; mkdir -p fusioncatcher/fusioninspector/{params.sample}; perl Scripts/make_fusioninspector_input.pl {params.sample} {params.ref}; FusionInspector --fusions fusioncatcher/fusioninspector/{params.sample}/{params.sample}_fusionInspector.input --genome_lib {params.starlib} --left_fq {input.file1} --right_fq {input.file2} --out_dir fusioncatcher/fusioninspector/{params.sample} --out_prefix {params.sample} --prep_for_IGV"
shell: "module load fusioninspector/1.1.0; module load STAR/2.7.0f; mkdir -p fusioncatcher/fusioninspector/{params.sample}; perl Scripts/make_fusioninspector_input.pl {params.sample} {params.ref}; FusionInspector --fusions fusioncatcher/fusioninspector/{params.sample}/{params.sample}_fusionInspector.input --genome_lib {params.starlib} --left_fq {input.file1} --right_fq {input.file2} --out_dir fusioncatcher/fusioninspector/{params.sample} --out_prefix {params.sample} --prep_for_IGV"
2 changes: 1 addition & 1 deletion Rules/fusioninsp_starfus.rl
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ rule rule fusioninsp_starfus:
input: fusions="starfusion/{x}/star-fusion.fusion_predictions.tsv",file1="{x}.R1.trimmed.fastq.gz",file2="{x}.R2.trimmed.fastq.gz"
output: "starfusion/fusioninspector/{x}/{x}.fusion_predictions.final"
params: rname='fusioninsp',sample="{x}",starlib=config['references'][pfamily]['STARFUSIONLIB']
shell: "module load fusioninspector/1.1.0; module load python/2.7; mkdir -p starfusion/fusioninspector/{params.sample}; FusionInspector --fusions {input.fusions} --genome_lib {params.starlib} --left_fq {input.file1} --right_fq {input.file2} --out_dir starfusion/fusioninspector/{params.sample} --out_prefix {params.sample} --prep_for_IGV"
shell: "module load fusioninspector/1.1.0; module load STAR/2.7.0f; mkdir -p starfusion/fusioninspector/{params.sample}; FusionInspector --fusions {input.fusions} --genome_lib {params.starlib} --left_fq {input.file1} --right_fq {input.file2} --out_dir starfusion/fusioninspector/{params.sample} --out_prefix {params.sample} --prep_for_IGV"
16 changes: 11 additions & 5 deletions Rules/gatk_select_variants.rl
Original file line number Diff line number Diff line change
@@ -1,9 +1,15 @@
rule gatk_select_variants:
input: bam=lambda wildcards: config['project']['units'][wildcards.x]+".recal.bam",
vcf="exome.recode.vcf"
output: vcf="sample_vcfs/{x}.sample.vcf",
vcf="exome.relaxedFilter.vcf",
output: vcfgz="sample_vcfs/{x}.sample.vcf.gz",
csvstats="sample_vcfs/{x}.stats.csv",
htmlstats="sample_vcfs/{x}.stats.html",
bed="sample_vcfs/{x}.snpeff.bed"
params: sample=lambda wildcards: config['project']['units'][wildcards.x],gatk=config['bin'][pfamily]['GATK'],genome=config['references'][pfamily]['GENOME'],targets="exome_targets.bed",snpeff=config['bin'][pfamily]['SNPEFF'],effgenome=config['references'][pfamily]['SNPEFF_GENOME'],effconfig=config['references'][pfamily]['SNPEFF_CONFIG'],rname="pl:extract"
shell: "{params.gatk} -T SelectVariants -R {params.genome} -V {input.vcf} -sn {params.sample} -env -o {output.vcf}; module load snpEff/4.3t; java -Xmx12g -jar $SNPEFF_JAR -v -c {params.effconfig} -o bed -csvStats {output.csvstats} -stats {output.htmlstats} {params.effgenome} {output.vcf} > {output.bed}"
bed="sample_vcfs/{x}.snpeff.bed",
stats="sample_vcfs/{x}.bcftools"
params: sample=lambda wildcards: config['project']['units'][wildcards.x],gatk=config['bin'][pfamily]['GATK'],genome=config['references'][pfamily]['GENOME'],targets="exome_targets.bed",snpeff=config['bin'][pfamily]['SNPEFF'],effgenome=config['references'][pfamily]['SNPEFF_GENOME'],effconfig=config['references'][pfamily]['SNPEFF_CONFIG'],exons=config['references'][pfamily]['EXONS'],rname="pl:extract"
shell: """{params.gatk} -T SelectVariants -R {params.genome} -V {input.vcf} -sn {params.sample} -env -o sample_vcfs/{params.sample}.sample.vcf
module load snpEff/4.3t; java -Xmx12g -jar $SNPEFF_JAR -v -c {params.effconfig} -o bed -csvStats {output.csvstats} -stats {output.htmlstats} {params.effgenome} sample_vcfs/{params.sample}.sample.vcf > {output.bed}
module load samtools
bgzip sample_vcfs/{params.sample}.sample.vcf
tabix -p vcf {output.vcfgz}
bcftools stats --exons {params.exons} --fasta-ref {params.genome} --regions-file {params.targets} {output.vcfgz} > {output.stats}"""
Loading

0 comments on commit eb80395

Please sign in to comment.