Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove split bam step from gatk calling #107

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
@@ -1,11 +1,9 @@
import os
from datetime import date

from janis_core.tool.test_classes import TTestCase
from janis_unix.tools import UncompressArchive
from janis_bioinformatics.tools import gatk4, BioinformaticsTool
from janis_bioinformatics.data_types import FastaWithDict, BamBai, VcfTabix, Bed, Vcf
from janis_bioinformatics.tools import BioinformaticsWorkflow
from janis_bioinformatics.tools import gatk4, BioinformaticsWorkflow
from janis_bioinformatics.tools.common import SplitMultiAllele


Expand Down Expand Up @@ -48,15 +46,10 @@ def constructor(self):
self.input("reference", FastaWithDict)
self.input("snps_dbsnp", VcfTabix)

self.step(
"split_bam",
gatk4.Gatk4SplitReads_4_1_3(bam=self.bam, intervals=self.intervals),
)

self.step(
"haplotype_caller",
gatk4.Gatk4HaplotypeCaller_4_1_3(
inputRead=self.split_bam.out,
inputRead=self.bam,
intervals=self.intervals,
reference=self.reference,
dbsnp=self.snps_dbsnp,
Expand Down
@@ -1,14 +1,11 @@
import os
from datetime import date

from janis_core import String, Array, WorkflowBuilder
from janis_core import String
from janis_core.tool.test_classes import TTestCase
from janis_unix.tools import UncompressArchive
from janis_bioinformatics.tools import gatk4, BioinformaticsTool
from janis_bioinformatics.data_types import FastaWithDict, BamBai, VcfTabix, Bed, Vcf
from janis_bioinformatics.tools import BioinformaticsWorkflow
from janis_bioinformatics.tools import gatk4, BioinformaticsWorkflow
from janis_bioinformatics.tools.common import SplitMultiAllele
from janis_bioinformatics.tools.htslib import BGZipLatest, TabixLatest
from janis_bioinformatics.tools.vcftools import VcfToolsvcftoolsLatest


Expand Down Expand Up @@ -38,22 +35,12 @@ def constructor(self):
self.input("panel_of_normals", VcfTabix(optional=True))
self.input("output_bam_name", String(optional=True))

# split normal and tumor bam
self.step(
"normal_split_bam",
self.process_subpipeline(bam=self.normal_bam, intervals=self.intervals),
)
self.step(
"tumor_split_bam",
self.process_subpipeline(bam=self.tumor_bam, intervals=self.intervals),
)

# variant calling + learn read orientation model
self.step(
"mutect2",
gatk4.GatkMutect2_4_1_3(
normalBams=[self.normal_split_bam.out],
tumorBams=[self.tumor_split_bam.out],
normalBams=[self.normal_bam],
tumorBams=[self.tumor_bam],
normalSample=self.normal_name,
intervals=self.intervals,
reference=self.reference,
Expand All @@ -74,7 +61,7 @@ def constructor(self):
self.step(
"getpileupsummaries",
gatk4.Gatk4GetPileUpSummariesLatest(
bam=self.tumor_split_bam.out,
bam=self.tumor_bam,
sites=self.gnomad,
intervals=self.intervals,
),
Expand Down Expand Up @@ -119,19 +106,6 @@ def constructor(self):
self.output("out_bam", source=self.mutect2.bam)
self.output("out", source=self.filterpass.out)

@staticmethod
def process_subpipeline(**connections):
w = WorkflowBuilder("split_bam_subpipeline")

w.input("bam", BamBai)
w.input("intervals", Bed(optional=True))
w.step(
"split_bam", gatk4.Gatk4SplitReads_4_1_3(bam=w.bam, intervals=w.intervals)
)
w.output("out", source=w.split_bam.out)

return w(**connections)

def bind_metadata(self):
self.metadata.version = "4.1.3.0"
self.metadata.dateCreated = date(2019, 2, 1)
Expand Down