From b88e91c9998afd192dd513b7e443c6fd2257b711 Mon Sep 17 00:00:00 2001 From: gregdenay Date: Fri, 8 Apr 2022 14:59:35 +0200 Subject: [PATCH 1/6] changelog --- changelog.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/changelog.md b/changelog.md index 219f838..f80b5ae 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,7 @@ ### Dev +### 1.4.4 + #### Improvements * Now unpacks trimmed read files on a sample wise fashion prior to Dada2 denoising instead of unpacking all samples at once. This should reduce the memory use during the analysis. * Preventively fixed a pandas CopyWarning (#31) and FutureWarning From 1cd07cf9ff2944de2ee6b746bb8bf4ffecf1eb5e Mon Sep 17 00:00:00 2001 From: gregdenay Date: Mon, 11 Apr 2022 14:42:11 +0200 Subject: [PATCH 2/6] Better error handling and produces stats until error for dada steps --- workflow/rules/dada2.smk | 4 +- workflow/scripts/dada.R | 120 ++++++++++++++++++++++++++++++--------- 2 files changed, 94 insertions(+), 30 deletions(-) diff --git a/workflow/rules/dada2.smk b/workflow/rules/dada2.smk index d672b68..35daeca 100644 --- a/workflow/rules/dada2.smk +++ b/workflow/rules/dada2.smk @@ -30,8 +30,8 @@ rule denoise: r1="{sample}/trimmed/{sample}_R1.fastq", r2="{sample}/trimmed/{sample}_R2.fastq", output: - r1_filt=temp("{sample}/denoising/{sample}_R1_filtered.fasta"), - r2_filt=temp("{sample}/denoising/{sample}_R2_filtered.fasta"), + r1_filt=temp("{sample}/denoising/{sample}_R1_filtered.fasta.gz"), + r2_filt=temp("{sample}/denoising/{sample}_R2_filtered.fasta.gz"), errplotF="{sample}/denoising/{sample}_R1_errorRates.pdf", errplotR="{sample}/denoising/{sample}_R2_errorRates.pdf", denoiseR1="{sample}/denoising/{sample}_R1_denoising.txt", diff --git a/workflow/scripts/dada.R b/workflow/scripts/dada.R index 3de53d6..1338741 100644 --- a/workflow/scripts/dada.R +++ b/workflow/scripts/dada.R @@ -1,14 +1,44 @@ #!/usr/bin/env Rscript + +# logging +log = file(snakemake@log[[1]], open="wt") +sink(log) +sink(log, type = "message") + +#DEBUGG +# log = file("logfile.log", open="wt") +# sink(log) +# sink(log, type = "message") + +# threads <- 6 +# max_EE <- 0 +# sample.names <- "test" +# filtFs <- "R1.fa" +# filtRs <- "R2.fa" + +# fnFs <- "/home/debian/NGS/metabarcoding_paper/foodme_benchmark/FooDMe_paper/.tests/NTC/trimmed/NTC_R1.fastq" +# fnRs <- "/home/debian/NGS/metabarcoding_paper/foodme_benchmark/FooDMe_paper/.tests/NTC/trimmed/NTC_R2.fastq" + +# fnFs <- "/home/debian/NGS/metabarcoding_paper/foodme_benchmark/FooDMe_paper/.tests/NTC/trimmed/NTC_R1x.fastq" +# fnRs <- "/home/debian/NGS/metabarcoding_paper/foodme_benchmark/FooDMe_paper/.tests/NTC/trimmed/NTC_R2x.fastq" + +# fnFs <- "/home/debian/NGS/metabarcoding_paper/foodme_benchmark/FooDMe_paper/.tests/Lasagne_horse/trimmed/Lasagne_horse_R1.fastq" +# fnRs <- "/home/debian/NGS/metabarcoding_paper/foodme_benchmark/FooDMe_paper/.tests/Lasagne_horse/trimmed/Lasagne_horse_R2.fastq" +## + + +# Imports library(ggplot2, quiet=T) library(dada2, quiet=T) -# Get parameters from snakemake --------------------------------------------------------------------------------------------- -# Input + +# Get parameters from snakemake +# Inputs fnFs <- snakemake@input[["r1"]] fnRs <- snakemake@input[["r2"]] -# Output +# Outputs filtFs <- snakemake@output[["r1_filt"]] filtRs <- snakemake@output[["r2_filt"]] errplotF <- snakemake@output[["errplotF"]] @@ -31,25 +61,34 @@ maxlength <- snakemake@params[["max_length"]] max_mismatch <- snakemake@params[["max_mismatch"]] chimera <- snakemake@params[["chimera"]] -# logging -log = file(snakemake@log[[1]], open="wt") -sink(log) -sink(log, type = "message") + +getN <- function(x) { + tryCatch(sum(getUniques(x)), + error=function(e){return(0)}) +} + # DADA Workflow ------------------------------------------------------------------------------------------------------------- -# Is there a more elegant solution here to handle no results cases? -# can't find a way to properly handle exceptions in R1 +# Filter reads +names(filtFs) <- sample.names +names(filtRs) <- sample.names + +out <- suppressWarnings( + withCallingHandlers( + expr=filterAndTrim(fnFs, filtFs, fnRs, filtRs, + maxN=0, maxEE=max_EE, rm.phix=TRUE, + compress=TRUE, multithread=threads, verbose=TRUE), + error=function(e) {message(e$message)}, + warning=function(w) {message(w$message) + writeLines("", filtRs) + writeLines("", filtFs) + } + )) + +# Learn Error rate tryCatch({ - # Filter reads - names(filtFs) <- sample.names - names(filtRs) <- sample.names - out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs, - maxN=0, maxEE=max_EE, rm.phix=TRUE, - compress=TRUE, multithread=threads, verbose=TRUE) - - # Learn Error rate errF <- learnErrors(filtFs, multithread=threads, verbose=TRUE) errR <- learnErrors(filtRs, multithread=threads, verbose=TRUE) @@ -112,7 +151,6 @@ tryCatch({ writeLines(asfasta, chimeras_fasta) # Report - getN <- function(x) sum(getUniques(x)) track <- cbind(sample.names, out, round((out[1]-out[2])/out[1]*100, 2), @@ -147,15 +185,10 @@ tryCatch({ "Reads in ASVs [%]") write.table(track, report, quote = FALSE, sep = "\t", row.names = FALSE) -}, error = function(c) { +}, error = function(e) { + warning("Terminated process on error.") + warning("Empty output files generated") - # Write empty files - if (!exists(filtRs)){ - writeLines("", filtRs) - } - if (!exists(filtFs)){ - writeLines("", filtFs) - } if (!exists(errplotF)){ ggplot() + theme_void() + @@ -186,8 +219,39 @@ tryCatch({ writeLines("", asv_table) } if (!exists(report)){ - writeLines("Sample Total reads Filtered reads Discarded reads [%] Denoised R1 Denoised R2 Merged Merging failures [%] ASV Size-filtered ASV Discarded ASVs [% ASV] Discarded ASVs [% of reads] Non-chimeric ASV Chimeras [% ASV] Chimeras [% of reads] Reads in ASVs Reads in ASVs [%]", - report) + track <- cbind(sample.names, + out, + round((out[1]-out[2])/out[1]*100, 2), + getN(dadaFs), + getN(dadaRs), + getN(mergers), + round((1-getN(mergers)/out[2])*100, 2), + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0, + 0) + colnames(track) <- c("Sample", + "Total reads", "Filtered reads", + "Discarded reads [%]", + "Denoised R1", + "Denoised R2", + "Merged", + "Merging failures [%]", + "ASV", + "Size-filtered ASV", + "Discarded ASVs [% ASV]", + "Discarded ASVs [% of reads]", + "Non-chimeric ASV", + "Chimeras [% ASV]", + "Chimeras [% of reads]", + "Reads in ASVs", + "Reads in ASVs [%]") + write.table(track, report, quote = FALSE, sep = "\t", row.names = FALSE) } if (!exists(chimeras_fasta)){ writeLines("", chimeras_fasta) From 7874488beee3066a5b3b966ebbcc692cd730361e Mon Sep 17 00:00:00 2001 From: gregdenay Date: Mon, 11 Apr 2022 14:51:54 +0200 Subject: [PATCH 3/6] Fixed newlines in report --- workflow/scripts/dada.R | 1 + workflow/scripts/write_report.Rmd | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/workflow/scripts/dada.R b/workflow/scripts/dada.R index 1338741..a65696f 100644 --- a/workflow/scripts/dada.R +++ b/workflow/scripts/dada.R @@ -186,6 +186,7 @@ tryCatch({ write.table(track, report, quote = FALSE, sep = "\t", row.names = FALSE) }, error = function(e) { + warning(paste0("Error message: ", e$message)) warning("Terminated process on error.") warning("Empty output files generated") diff --git a/workflow/scripts/write_report.Rmd b/workflow/scripts/write_report.Rmd index e61981c..730791d 100644 --- a/workflow/scripts/write_report.Rmd +++ b/workflow/scripts/write_report.Rmd @@ -332,5 +332,9 @@ knitr::kable(soft_table) ## Help [Link to the repository](https://github.com/CVUA-RRW/FooDMe) + + [Report a bug or ask a question](https://github.com/CVUA-RRW/FooDMe/issues/new) + + [Last release](https://github.com/CVUA-RRW/FooDMe/releases) From 3f3b08c58c67a471d9ee1d6d5e034659565de012 Mon Sep 17 00:00:00 2001 From: gregdenay Date: Mon, 11 Apr 2022 14:52:24 +0200 Subject: [PATCH 4/6] Fixed newlines in report --- workflow/Snakefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 02cfbfb..a418986 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -49,7 +49,7 @@ onstart: print(f"\nYou are using FooDMe version: {version}") with open(pipe_log, "a") as f: f.write(f"[{get_local_time()}]: Pipeline started\n") - + raise() onsuccess: try: From 9f7643c6e26ed9b52aae8f7f953b73bc820f569b Mon Sep 17 00:00:00 2001 From: gregdenay Date: Mon, 11 Apr 2022 15:04:14 +0200 Subject: [PATCH 5/6] Fixed on_error script when cwd is not snakemake's wd --- workflow/Snakefile | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index a418986..8b63cf5 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -49,7 +49,7 @@ onstart: print(f"\nYou are using FooDMe version: {version}") with open(pipe_log, "a") as f: f.write(f"[{get_local_time()}]: Pipeline started\n") - raise() + onsuccess: try: @@ -67,12 +67,18 @@ onsuccess: onerror: - for logfile in os.listdir(".snakemake/log/"): - shutil.move(os.path.join(".snakemake/log", logfile), "logs") - shutil.rmtree(".snakemake", ignore_errors=True) - print( - f"\nAn error occured, please consult the latest log file in" - f"{os.path.join(config['workdir'], '.snakemake', 'log')}" - ) + try: + for logfile in os.listdir(".snakemake/log/"): + shutil.move(os.path.join(".snakemake/log", logfile), "logs") + shutil.rmtree(".snakemake", ignore_errors=True) + print( + f"\nAn error occured, please consult the latest log file in" + f"{os.path.join(config['workdir'], '.snakemake', 'log')}" + ) + except: + print( + f"\nAn error occured, please consult the snakemake log file in" + f"the current working directory." + ) with open(pipe_log, "a") as f: f.write(f"[{get_local_time()}]: Pipeline stopped on error\n") From 90ec773987eaab714f7691d91565c52af23389c1 Mon Sep 17 00:00:00 2001 From: gregdenay Date: Mon, 11 Apr 2022 15:09:06 +0200 Subject: [PATCH 6/6] changelog --- VERSION | 2 +- changelog.md | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 1c99cf0..efe0205 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.4.4 +1.4.4dev diff --git a/changelog.md b/changelog.md index f80b5ae..6d9a22d 100644 --- a/changelog.md +++ b/changelog.md @@ -1,5 +1,8 @@ ### Dev +#### Improvements +* Improved error handling and logging for the DADA2 steps. Will now correctly output number of reads and denoisin/merging results for failing samples. + ### 1.4.4 #### Improvements