From 36662a3799d4d3e2cbb76a84eaf412b891e20111 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Mon, 26 Feb 2024 14:19:20 +0100 Subject: [PATCH 1/4] Add helper script to clean up test files --- test/helper.sh | 20 ++++++++++++++++++++ test/test_fusion.yml | 4 +--- test/test_hamlet.yml | 18 +++--------------- test/test_snv_indels.yml | 6 +----- 4 files changed, 25 insertions(+), 23 deletions(-) create mode 100644 test/helper.sh diff --git a/test/helper.sh b/test/helper.sh new file mode 100644 index 0000000..bdc0f56 --- /dev/null +++ b/test/helper.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +set -euxo pipefail + +ref=test/data/reference/hamlet-ref.fa +star=test/data/reference/hamlet-star/Genome + +function setup { + unxz -k ${ref}.xz + unxz -k ${star}.xz +} + +function cleanup { + rm -f ${ref} + rm -f ${star} +} + +trap cleanup EXIT + +setup diff --git a/test/test_fusion.yml b/test/test_fusion.yml index 8045978..1dd0835 100644 --- a/test/test_fusion.yml +++ b/test/test_fusion.yml @@ -48,7 +48,7 @@ - fusion command: > bash -c " - unxz -k test/data/reference/hamlet-ref.fa.xz; + source test/helper.sh snakemake -rp --snakefile includes/fusion/Snakefile @@ -60,8 +60,6 @@ --use-singularity --singularity-args '--cleanenv --bind /tmp' --singularity-prefix '~/.singularity/cache/snakemake' - - rm test/data/reference/hamlet-ref.fa; " files: # Should create the arriba output files diff --git a/test/test_hamlet.yml b/test/test_hamlet.yml index 707d16d..b451bdb 100644 --- a/test/test_hamlet.yml +++ b/test/test_hamlet.yml @@ -58,10 +58,7 @@ - hamlet command: > bash -c " - set -e - - unxz -k test/data/reference/hamlet-ref.fa.xz; - unxz -k test/data/reference/hamlet-star/Genome.xz; + source test/helper.sh snakemake -rp \ --snakefile Snakefile \ @@ -72,9 +69,6 @@ --use-singularity \ --singularity-args '--cleanenv --bind /tmp' \ --singularity-prefix '~/.singularity/cache/snakemake' - - rm test/data/reference/hamlet-ref.fa; - rm test/data/reference/hamlet-star/Genome; " - path: MO1-RNAseq-1-16714/hamlet_report.SRR8615409.pdf - path: "log/generate_report.MO1-RNAseq-1-16714.txt" @@ -86,16 +80,13 @@ - hamlet command: > bash -c " - set -e - - unxz -k test/data/reference/hamlet-ref.fa.xz; - unxz -k test/data/reference/hamlet-star/Genome.xz; + source test/helper.sh snakemake \ --snakefile Snakefile \ --configfile test/data/config/hamlet.json \ --config pepfile=test/pep/chrM_itd.csv \ - --cores \ + --cores 1 \ --verbose \ --use-singularity \ --singularity-args '--cleanenv --bind /tmp' \ @@ -103,9 +94,6 @@ SRR8615409/hamlet_report.SRR8615409.html \ SRR8615409/hamlet_report.SRR8615409.pdf - rm test/data/reference/hamlet-ref.fa; - rm test/data/reference/hamlet-star/Genome; - # Test if the table scripts can still parse the summary.json file python3 utilities/hamlet_table.py variant SRR8615409/SRR8615409.summary.json > variant.tsv; diff --git a/test/test_snv_indels.yml b/test/test_snv_indels.yml index 8b71465..bb94651 100644 --- a/test/test_snv_indels.yml +++ b/test/test_snv_indels.yml @@ -46,8 +46,7 @@ - snv-indels command: > bash -c " - unxz -k test/data/reference/hamlet-ref.fa.xz; - unxz -k test/data/reference/hamlet-star/Genome.xz; + source test/helper.sh snakemake -rp \ --snakefile includes/snv-indels/Snakefile \ @@ -59,9 +58,6 @@ --use-singularity \ --singularity-args '--cleanenv --bind /tmp' \ --singularity-prefix '~/.singularity/cache/snakemake' - - rm test/data/reference/hamlet-ref.fa; - rm test/data/reference/hamlet-star/Genome; " stderr: # snv-indel should not use a local VEP database, but query ensembl From e0a8b4550b5de9387c901ebe1717b12bbf57ec57 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Mon, 26 Feb 2024 15:36:17 +0100 Subject: [PATCH 2/4] Refactor FastQ rules into single rule --- includes/qc-seq/Snakefile | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) diff --git a/includes/qc-seq/Snakefile b/includes/qc-seq/Snakefile index dd624ae..43918a4 100644 --- a/includes/qc-seq/Snakefile +++ b/includes/qc-seq/Snakefile @@ -102,39 +102,15 @@ rule cutadapt: """ -rule fastqc_processed: - """Runs FastQC for each pair of QC-ed inputs.""" +use rule fastqc_raw as fastqc_processed with: input: fq1="{sample}/qc-seq/{read_group}/{sample}-{read_group}-R1.fq.gz", fq2="{sample}/qc-seq/{read_group}/{sample}-{read_group}-R2.fq.gz", tmp="tmp", output: folder=directory("{sample}/qc-seq/{read_group}/fastqc-{pair}-processed"), - params: - xms="4096M", - xmx="4096M", - fastqc_dir="usr/local/opt/fastqc-0.11.9", log: "log/fastqc_processed.{sample}.{read_group}.{pair}.txt", - threads: 4 - container: - containers["fastqc"] - shell: - """ - mkdir -p {output.folder} - - FASTQC_DIR=/{params.fastqc_dir} - export CLASSPATH="$FASTQC_DIR:$FASTQC_DIR/sam-1.103.jar:$FASTQC_DIR/jbzip2-0.9.jar:$FASTQC_DIR/cisd-jhdf5.jar" - - java -Djava.awt.headless=true -Xms{params.xms} -Xmx{params.xmx} \ - -Dfastqc.output_dir={output.folder} \ - -Dfastqc.io.tmpdir={input.tmp} \ - -Dfastqc.unzip=true \ - -Dfastqc.nogroup=true \ - -Dfastqc.threads={threads} \ - uk.ac.babraham.FastQC.FastQCApplication \ - {input.fq1:q} {input.fq2:q} 2> {log} - """ rule rg_stats: From 6ff2513f45442304d83797ab75d283708b86aa3e Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Mon, 26 Feb 2024 16:06:29 +0100 Subject: [PATCH 3/4] Update FastQC to 0.12.1 --- CHANGELOG.rst | 6 ++++-- includes/qc-seq/Snakefile | 21 ++++++++------------- includes/qc-seq/common.smk | 2 +- test/test_qc_seq.yml | 6 +++--- 4 files changed, 16 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b3342d5..89619ee 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -8,10 +8,12 @@ Changelog .. that users understand how the changes affect the new version. ********** -v2.0.5-dev +v2.0.6-dev ********** +* Update FastQC to 0.12.1 * Modified PDF formatting - -Sort the genes of interest alphabetically +* Sort the genes of interest alphabetically + ********** v2.0.5 ********** diff --git a/includes/qc-seq/Snakefile b/includes/qc-seq/Snakefile index 43918a4..3e54690 100644 --- a/includes/qc-seq/Snakefile +++ b/includes/qc-seq/Snakefile @@ -45,9 +45,7 @@ rule fastqc_raw: output: folder=directory("{sample}/qc-seq/{read_group}/fastqc-{pair}-raw"), params: - xms="4096M", - xmx="4096M", - fastqc_dir="usr/local/opt/fastqc-0.11.9", + memory=4096, log: "log/fastqc_raw.{sample}.{read_group}.{pair}.txt", threads: 4 @@ -57,16 +55,13 @@ rule fastqc_raw: """ mkdir -p {output.folder} - FASTQC_DIR=/{params.fastqc_dir} - export CLASSPATH="$FASTQC_DIR:$FASTQC_DIR/sam-1.103.jar:$FASTQC_DIR/jbzip2-0.9.jar:$FASTQC_DIR/cisd-jhdf5.jar" - - java -Djava.awt.headless=true -Xms{params.xms} -Xmx{params.xmx} \ - -Dfastqc.output_dir={output.folder} \ - -Dfastqc.io.tmpdir={input.tmp} \ - -Dfastqc.unzip=true \ - -Dfastqc.nogroup=true \ - -Dfastqc.threads={threads} \ - uk.ac.babraham.FastQC.FastQCApplication \ + fastqc \ + --outdir {output.folder} \ + --dir {input.tmp} \ + --extract \ + --nogroup \ + --threads {threads} \ + --memory {params.memory} \ {input.fq1:q} {input.fq2:q} 2> {log} """ diff --git a/includes/qc-seq/common.smk b/includes/qc-seq/common.smk index 6a21c7b..16bc343 100644 --- a/includes/qc-seq/common.smk +++ b/includes/qc-seq/common.smk @@ -4,7 +4,7 @@ from types import SimpleNamespace containers = { "crimson": "docker://quay.io/biocontainers/crimson:1.1.0--pyh5e36f6f_0", "cutadapt": "docker://quay.io/biocontainers/cutadapt:4.1--py310h1425a21_1", - "fastqc": "docker://quay.io/biocontainers/fastqc:0.11.9--hdfd78af_1", + "fastqc": "docker://quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0", } diff --git a/test/test_qc_seq.yml b/test/test_qc_seq.yml index 807d1a3..e3bf4ba 100644 --- a/test/test_qc_seq.yml +++ b/test/test_qc_seq.yml @@ -50,9 +50,9 @@ contains_regex: # The forward/reverse fastq files from the PEP should be handled correctly - - '-Dfastqc.output_dir=TestSample1.* test/data/fastq/R1.fq.gz test/data/fastq/R2.fq.gz' - - '-Dfastqc.output_dir=TestSample2.* test/data/fastq/R1.fq.gz test/data/fastq/R2.fq.gz' - - "-Dfastqc.output_dir=TestSample2.* 'test/data/fastq/SRR8615409 chrM_1.fastq.gz' 'test/data/fastq/SRR8615409 chrM_2.fastq.gz'" + - 'fastqc .* --outdir TestSample1.* test/data/fastq/R1.fq.gz test/data/fastq/R2.fq.gz' + - 'fastqc .* --outdir TestSample2.* test/data/fastq/R1.fq.gz test/data/fastq/R2.fq.gz' + - "fastqc .* --outdir TestSample2.* 'test/data/fastq/SRR8615409 chrM_1.fastq.gz' 'test/data/fastq/SRR8615409 chrM_2.fastq.gz'" # Singularity should be available - name: test-qc-sanity-singularity From 7246607fe5756158eee3e7133160d2dd1c655d27 Mon Sep 17 00:00:00 2001 From: Redmar van den Berg Date: Mon, 26 Feb 2024 16:12:19 +0100 Subject: [PATCH 4/4] Remove some old code --- test/test_report.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/test/test_report.py b/test/test_report.py index 6f7b1c6..7dc3768 100644 --- a/test/test_report.py +++ b/test/test_report.py @@ -32,10 +32,6 @@ def parse_table(table): # Get the rows for row in table.find("tbody").find_all("tr"): d = {k: v.get_text() for k, v in zip(headers, row.find_all("td"))} - # Convert the price field to float - # for k, v in d.items(): - # if "€" in v: - # d[k] = price_to_float(v) data.append(d) return data