From a8e9aa83c088456309e45b523902433e01847930 Mon Sep 17 00:00:00 2001 From: jlac Date: Wed, 30 Jan 2019 09:30:26 -0500 Subject: [PATCH] moved sort out of STAR and into samtools rule to deal with memory bug --- Rules/samtools_sort.rl | 8 ++++++++ Rules/star.align.2.rl | 4 ++-- cluster.json | 5 +++++ rules.json | 1 + 4 files changed, 16 insertions(+), 2 deletions(-) create mode 100644 Rules/samtools_sort.rl diff --git a/Rules/samtools_sort.rl b/Rules/samtools_sort.rl new file mode 100644 index 0000000..a4471da --- /dev/null +++ b/Rules/samtools_sort.rl @@ -0,0 +1,8 @@ +rule samtools_sort: + input: "{x}.p2Aligned.out.bam", + output: temp("{x}.p2Aligned.sortedByCoord.out.bam") + params: novosort=config['bin'][pfamily]['NOVOSORT'],rname="pl:sort" + threads: 8 + shell: "module load samtools/1.9; samtools sort -@ {threads} -o {output} {input};" + + diff --git a/Rules/star.align.2.rl b/Rules/star.align.2.rl index ac0d542..1324db8 100755 --- a/Rules/star.align.2.rl +++ b/Rules/star.align.2.rl @@ -1,6 +1,6 @@ rule star_align_2: input: file1="{x}.R1.trimmed.fastq.gz",file2="{x}.R2.trimmed.fastq.gz",length="QC/{x}_readlength.txt",tab=expand("{x}SJ.out.tab",x=samples) - output: out1=temp("{x}.p2Aligned.sortedByCoord.out.bam"),out4="{x}.p2SJ.out.tab",out5="{x}.p2Log.final.out" + output: out1=temp("{x}.p2Aligned.out.bam"),out4="{x}.p2SJ.out.tab",out5="{x}.p2Log.final.out" params: rname='pl:star2p',prefix="{x}.p2",outsamunmapped=config['bin'][pfamily]['OUTSAMUNMAPPED'],wigtype=config['bin'][pfamily]['WIGTYPE'],wigstrand=config['bin'][pfamily]['WIGSTRAND'], gtffile=config['references'][pfamily]['FUSIONGTFFILE'], nbjuncs=config['bin'][pfamily]['NBJUNCS'],starref=config['references'][pfamily]['STARREF'] threads: 32 run: @@ -8,5 +8,5 @@ rule star_align_2: rl=int(open(input.length).readlines()[0].strip())-1 dbrl=sorted(list(map(lambda x:int(re.findall("genes-(\d+)",x)[0]),glob.glob(params.starref+'*/',recursive=False)))) bestdbrl=next(x[1] for x in enumerate(dbrl) if x[1] >= rl) - cmd="module load STAR/2.5.2b; STAR --genomeDir {params.starref}"+str(bestdbrl)+" --readFilesIn {input.file1} {input.file2} --readFilesCommand zcat --runThreadN {threads} --outFileNamePrefix {params.prefix} --outSAMunmapped {params.outsamunmapped} --sjdbFileChrStartEnd {input.tab} --sjdbGTFfile {params.gtffile} --limitSjdbInsertNsj 10000000 --outSAMtype BAM SortedByCoordinate --limitBAMsortRAM 39627002904" + cmd="module load STAR/2.5.2b; STAR --genomeDir {params.starref}"+str(bestdbrl)+" --readFilesIn {input.file1} {input.file2} --readFilesCommand zcat --runThreadN {threads} --outFileNamePrefix {params.prefix} --outSAMunmapped {params.outsamunmapped} --sjdbFileChrStartEnd {input.tab} --sjdbGTFfile {params.gtffile} --limitSjdbInsertNsj 10000000 --outSAMtype BAM Unsorted" shell(cmd) \ No newline at end of file diff --git a/cluster.json b/cluster.json index ebbfa13..8947381 100755 --- a/cluster.json +++ b/cluster.json @@ -24,6 +24,11 @@ "gres": "lscratch:256", "threads": "2" }, + "samtools_sort": { + "mem": "48g", + "gres": "lscratch:256", + "threads": "8" + }, "picard_dedup": { "mem": "96g", "gres": "lscratch:256", diff --git a/rules.json b/rules.json index 11357ad..c40dcba 100755 --- a/rules.json +++ b/rules.json @@ -7,6 +7,7 @@ "freec_wgs_tumoronly": ["wgs-somatic-tumoronly"], "sequenza": ["wgs-somatic","exomeseq-somatic"], "avia": ["wgslow","exomeseq-germline","exomeseq-germline-recal","exomeseq-germline-partial","rnaseqvargerm"], + "samtools_sort": ["rnaseqvargerm"], "index_bams": ["wgslow","exomeseq-germline","wgs-somatic-tumoronly","exomeseq-somatic-tumoronly","wgs-somatic","exomeseq-somatic"], "rnaseqforfusions": ["rnaseqfusion"], "rnafusioncleanup": ["none"],