From 829097a2fc66de91db17e1e2664b5aeb26f1232d Mon Sep 17 00:00:00 2001 From: borauyar Date: Fri, 1 Apr 2022 13:50:08 +0200 Subject: [PATCH] Fixes #103, add resources fields in the settings file for all rules, decrease default required memory --- etc/settings.yaml.in | 30 ++++++++++++++++++++++++++---- snakefile.py | 20 ++++++++++++-------- 2 files changed, 38 insertions(+), 12 deletions(-) diff --git a/etc/settings.yaml.in b/etc/settings.yaml.in index ca74536..44c0048 100755 --- a/etc/settings.yaml.in +++ b/etc/settings.yaml.in @@ -81,25 +81,44 @@ execution: rules: __default__: threads: 1 - memory: 8000 + memory: 2000 + translate_sample_sheet_for_report: + threads: 1 + memory: 500 + trim_qc_reads: + threads: 1 + memory: 4000 star_index: threads: 2 memory: 32000 hisat2-build: - threads: 2 - memory: 16000 + threads: 2 + memory: 32000 salmon_index: threads: 8 memory: 5000 salmon_quant: threads: 8 memory: 6000 + counts_from_salmon: + threads: 1 + memory: 200 + collate_read_counts: + threads: 1 + memory: 200 + norm_counts_deseq: + threads: 1 + memory: 1000 star_map: threads: 2 memory: 16000 hisat2: threads: 2 memory: 8000 + index_bam: + memory: 500 + multiqc: + memory: 500 coverage_bamCoverage: threads: 1 memory: 4000 @@ -111,7 +130,10 @@ execution: memory: 6000 check_annotation_files: threads: 1 - memory: 16000 + memory: 16000 + reports: + threads: 1 + memory: 4000 tools: gunzip: diff --git a/snakefile.py b/snakefile.py index 56001d5..57c3d1e 100755 --- a/snakefile.py +++ b/snakefile.py @@ -297,6 +297,8 @@ def trim_reads_input(args): r2=os.path.join(TRIMMED_READS_DIR, "{sample}.trimmed.R2.fq.gz"), html=os.path.join(QC_DIR, "{sample}.pe.fastp.html"), json=os.path.join(QC_DIR, "{sample}.pe.fastp.json") #notice that multiqc recognizes files ending with fast.json + resources: + mem_mb = config['execution']['rules']['trim_qc_reads']['memory'] log: os.path.join(LOG_DIR, 'trim_reads.{sample}.log') shell: "{FASTP_EXEC} --in1 {input[0]} --in2 {input[1]} --out1 {output.r1} --out2 {output.r2} -h {output.html} -j {output.json} >> {log} 2>&1" @@ -307,6 +309,8 @@ def trim_reads_input(args): r = os.path.join(TRIMMED_READS_DIR, "{sample}.trimmed.fq.gz"), html=os.path.join(QC_DIR, "{sample}.se.fastp.html"), json=os.path.join(QC_DIR, "{sample}.se.fastp.json") #notice that multiqc recognizes files ending with fast.json + resources: + mem_mb = config['execution']['rules']['trim_qc_reads']['memory'] log: os.path.join(LOG_DIR, 'trim_reads.{sample}.log') shell: "{FASTP_EXEC} --in1 {input[0]} --out1 {output.r} -h {output.html} -j {output.json} >> {log} 2>&1 " @@ -395,7 +399,7 @@ def hisat2_file_arguments(args): input: os.path.join(MAPPED_READS_DIR, MAPPER, '{sample}_Aligned.sortedByCoord.out.bam') output: os.path.join(MAPPED_READS_DIR, MAPPER, '{sample}_Aligned.sortedByCoord.out.bam.bai') resources: - mem_mb = 100 + mem_mb = config['execution']['rules']['index_bam']['memory'] log: os.path.join(LOG_DIR, 'samtools_index_{sample}.log') shell: "{SAMTOOLS_EXEC} index {input} {output} >> {log} 2>&1" @@ -446,7 +450,7 @@ def hisat2_file_arguments(args): os.path.join(COUNTS_DIR, "normalized", "salmon", "TPM_counts_from_SALMON.transcripts.tsv"), os.path.join(COUNTS_DIR, "normalized", "salmon", "TPM_counts_from_SALMON.genes.tsv") resources: - mem_mb = 1000 + mem_mb = config['execution']['rules']['counts_from_salmon']['memory'] log: os.path.join(LOG_DIR, "salmon", 'salmon_import_counts.log') shell: "{RSCRIPT_EXEC} {SCRIPTS_DIR}/counts_matrix_from_SALMON.R {SALMON_DIR} {COUNTS_DIR} {input.colDataFile} >> {log} 2>&1" @@ -497,7 +501,7 @@ def hisat2_file_arguments(args): mapping_output=expand(os.path.join(MAPPED_READS_DIR, MAPPER, '{sample}_Aligned.sortedByCoord.out.bam'), sample=SAMPLES) output: os.path.join(MULTIQC_DIR, 'multiqc_report.html') resources: - mem_mb = 200 + mem_mb = config['execution']['rules']['multiqc']['memory'] log: os.path.join(LOG_DIR, f'multiqc.{MAPPER}.log') shell: "{MULTIQC_EXEC} -f -o {MULTIQC_DIR} {OUTPUT_DIR} >> {log} 2>&1" @@ -530,7 +534,7 @@ def hisat2_file_arguments(args): output: os.path.join(COUNTS_DIR, "raw_counts", MAPPER, "counts.tsv") resources: - mem_mb = 200 + mem_mb = config['execution']['rules']['collate_read_counts']['memory'] log: os.path.join(LOG_DIR, MAPPER, "collate_read_counts.log") params: mapped_dir = os.path.join(MAPPED_READS_DIR, MAPPER), @@ -549,7 +553,7 @@ def hisat2_file_arguments(args): size_factors = os.path.join(COUNTS_DIR, "normalized", MAPPER, "deseq_size_factors.txt"), norm_counts = os.path.join(COUNTS_DIR, "normalized", MAPPER, "deseq_normalized_counts.tsv") resources: - mem_mb = 1000 + mem_mb = config['execution']['rules']['norm_counts_deseq']['memory'] log: os.path.join(LOG_DIR, MAPPER, "norm_counts_deseq.log") params: @@ -576,7 +580,7 @@ def hisat2_file_arguments(args): output: os.path.join(OUTPUT_DIR, "report", MAPPER, '{analysis}.deseq.report.html') resources: - mem_mb = 4000 + mem_mb = config['execution']['rules']['reports']['memory'] shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' --description='{params.description}' --selfContained='{params.selfContained}' >> {log} 2>&1" @@ -598,7 +602,7 @@ def hisat2_file_arguments(args): output: os.path.join(OUTPUT_DIR, "report", 'salmon', '{analysis}.salmon.transcripts.deseq.report.html') resources: - mem_mb = 4000 + mem_mb = config['execution']['rules']['reports']['memory'] shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}.salmon.transcripts' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' --description='{params.description}' --selfContained='{params.selfContained}' >> {log} 2>&1" rule report3: @@ -619,5 +623,5 @@ def hisat2_file_arguments(args): output: os.path.join(OUTPUT_DIR, "report", "salmon", '{analysis}.salmon.genes.deseq.report.html') resources: - mem_mb = 4000 + mem_mb = config['execution']['rules']['reports']['memory'] shell: "{RSCRIPT_EXEC} {params.reportR} --logo={params.logo} --prefix='{wildcards.analysis}.salmon.genes' --reportFile={params.reportRmd} --countDataFile={input.counts} --colDataFile={input.coldata} --gtfFile={GTF_FILE} --caseSampleGroups='{params.case}' --controlSampleGroups='{params.control}' --covariates='{params.covariates}' --workdir={params.outdir} --organism='{ORGANISM}' --description='{params.description}' --selfContained='{params.selfContained}' >> {log} 2>&1"