Skip to content

Commit

Permalink
Fixes #103, add resources fields in the settings file for all rules, …
Browse files Browse the repository at this point in the history
…decrease default required memory
  • Loading branch information
borauyar committed Apr 1, 2022
1 parent 1677c75 commit 829097a
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 12 deletions.
30 changes: 26 additions & 4 deletions etc/settings.yaml.in
Original file line number Diff line number Diff line change
Expand Up @@ -81,25 +81,44 @@ execution:
rules:
__default__:
threads: 1
memory: 8000
memory: 2000
translate_sample_sheet_for_report:
threads: 1
memory: 500
trim_qc_reads:
threads: 1
memory: 4000
star_index:
threads: 2
memory: 32000
hisat2-build:
threads: 2
memory: 16000
threads: 2
memory: 32000
salmon_index:
threads: 8
memory: 5000
salmon_quant:
threads: 8
memory: 6000
counts_from_salmon:
threads: 1
memory: 200
collate_read_counts:
threads: 1
memory: 200
norm_counts_deseq:
threads: 1
memory: 1000
star_map:
threads: 2
memory: 16000
hisat2:
threads: 2
memory: 8000
index_bam:
memory: 500
multiqc:
memory: 500
coverage_bamCoverage:
threads: 1
memory: 4000
Expand All @@ -111,7 +130,10 @@ execution:
memory: 6000
check_annotation_files:
threads: 1
memory: 16000
memory: 16000
reports:
threads: 1
memory: 4000

tools:
gunzip:
Expand Down
20 changes: 12 additions & 8 deletions snakefile.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,8 @@ def trim_reads_input(args):
r2=os.path.join(TRIMMED_READS_DIR, "{sample}.trimmed.R2.fq.gz"),
html=os.path.join(QC_DIR, "{sample}.pe.fastp.html"),
json=os.path.join(QC_DIR, "{sample}.pe.fastp.json") #notice that multiqc recognizes files ending with fast.json
resources:
mem_mb = config['execution']['rules']['trim_qc_reads']['memory']
log: os.path.join(LOG_DIR, 'trim_reads.{sample}.log')
shell: "{FASTP_EXEC} --in1 {input[0]} --in2 {input[1]} --out1 {output.r1} --out2 {output.r2} -h {output.html} -j {output.json} >> {log} 2>&1"

Expand All @@ -307,6 +309,8 @@ def trim_reads_input(args):
r = os.path.join(TRIMMED_READS_DIR, "{sample}.trimmed.fq.gz"),
html=os.path.join(QC_DIR, "{sample}.se.fastp.html"),
json=os.path.join(QC_DIR, "{sample}.se.fastp.json") #notice that multiqc recognizes files ending with fast.json
resources:
mem_mb = config['execution']['rules']['trim_qc_reads']['memory']
log: os.path.join(LOG_DIR, 'trim_reads.{sample}.log')
shell: "{FASTP_EXEC} --in1 {input[0]} --out1 {output.r} -h {output.html} -j {output.json} >> {log} 2>&1 "

Expand Down Expand Up @@ -395,7 +399,7 @@ def hisat2_file_arguments(args):
input: os.path.join(MAPPED_READS_DIR, MAPPER, '{sample}_Aligned.sortedByCoord.out.bam')
output: os.path.join(MAPPED_READS_DIR, MAPPER, '{sample}_Aligned.sortedByCoord.out.bam.bai')
resources:
mem_mb = 100
mem_mb = config['execution']['rules']['index_bam']['memory']
log: os.path.join(LOG_DIR, 'samtools_index_{sample}.log')
shell: "{SAMTOOLS_EXEC} index {input} {output} >> {log} 2>&1"

Expand Down Expand Up @@ -446,7 +450,7 @@ def hisat2_file_arguments(args):
os.path.join(COUNTS_DIR, "normalized", "salmon", "TPM_counts_from_SALMON.transcripts.tsv"),
os.path.join(COUNTS_DIR, "normalized", "salmon", "TPM_counts_from_SALMON.genes.tsv")
resources:
mem_mb = 1000
mem_mb = config['execution']['rules']['counts_from_salmon']['memory']
log: os.path.join(LOG_DIR, "salmon", 'salmon_import_counts.log')
shell: "{RSCRIPT_EXEC} {SCRIPTS_DIR}/counts_matrix_from_SALMON.R {SALMON_DIR} {COUNTS_DIR} {input.colDataFile} >> {log} 2>&1"

Expand Down Expand Up @@ -497,7 +501,7 @@ def hisat2_file_arguments(args):
mapping_output=expand(os.path.join(MAPPED_READS_DIR, MAPPER, '{sample}_Aligned.sortedByCoord.out.bam'), sample=SAMPLES)
output: os.path.join(MULTIQC_DIR, 'multiqc_report.html')
resources:
mem_mb = 200
mem_mb = config['execution']['rules']['multiqc']['memory']
log: os.path.join(LOG_DIR, f'multiqc.{MAPPER}.log')
shell: "{MULTIQC_EXEC} -f -o {MULTIQC_DIR} {OUTPUT_DIR} >> {log} 2>&1"

Expand Down Expand Up @@ -530,7 +534,7 @@ def hisat2_file_arguments(args):
output:
os.path.join(COUNTS_DIR, "raw_counts", MAPPER, "counts.tsv")
resources:
mem_mb = 200
mem_mb = config['execution']['rules']['collate_read_counts']['memory']
log: os.path.join(LOG_DIR, MAPPER, "collate_read_counts.log")
params:
mapped_dir = os.path.join(MAPPED_READS_DIR, MAPPER),
Expand All @@ -549,7 +553,7 @@ def hisat2_file_arguments(args):
size_factors = os.path.join(COUNTS_DIR, "normalized", MAPPER, "deseq_size_factors.txt"),
norm_counts = os.path.join(COUNTS_DIR, "normalized", MAPPER, "deseq_normalized_counts.tsv")
resources:
mem_mb = 1000
mem_mb = config['execution']['rules']['norm_counts_deseq']['memory']
log:
os.path.join(LOG_DIR, MAPPER, "norm_counts_deseq.log")
params:
Expand All @@ -576,7 +580,7 @@ def hisat2_file_arguments(args):
output:
os.path.join(OUTPUT_DIR, "report", MAPPER, '{analysis}.deseq.report.html')
resources:
mem_mb = 4000
mem_mb = config['execution']['rules']['reports']['memory']
shell:
"{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' --description='{params.description}' --selfContained='{params.selfContained}' >> {log} 2>&1"

Expand All @@ -598,7 +602,7 @@ def hisat2_file_arguments(args):
output:
os.path.join(OUTPUT_DIR, "report", 'salmon', '{analysis}.salmon.transcripts.deseq.report.html')
resources:
mem_mb = 4000
mem_mb = config['execution']['rules']['reports']['memory']
shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}.salmon.transcripts' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' --description='{params.description}' --selfContained='{params.selfContained}' >> {log} 2>&1"

rule report3:
Expand All @@ -619,5 +623,5 @@ def hisat2_file_arguments(args):
output:
os.path.join(OUTPUT_DIR, "report", "salmon", '{analysis}.salmon.genes.deseq.report.html')
resources:
mem_mb = 4000
mem_mb = config['execution']['rules']['reports']['memory']
shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}.salmon.genes' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' --description='{params.description}' --selfContained='{params.selfContained}' >> {log} 2>&1"

0 comments on commit 829097a

Please sign in to comment.