Skip to content

Commit

Permalink
Merge pull request #39 from CVUA-RRW/dada_error_handling
Browse files Browse the repository at this point in the history
Dada error handling
  • Loading branch information
gregdenay committed Apr 11, 2022
2 parents 806120d + 90ec773 commit 07e5d4e
Show file tree
Hide file tree
Showing 6 changed files with 118 additions and 38 deletions.
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
1.4.4
1.4.4dev
5 changes: 5 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
### Dev

#### Improvements
* Improved error handling and logging for the DADA2 steps. Will now correctly output number of reads and denoisin/merging results for failing samples.

### 1.4.4

#### Improvements
* Now unpacks trimmed read files on a sample wise fashion prior to Dada2 denoising instead of unpacking all samples at once. This should reduce the memory use during the analysis.
* Preventively fixed a pandas CopyWarning (#31) and FutureWarning
Expand Down
20 changes: 13 additions & 7 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -67,12 +67,18 @@ onsuccess:


onerror:
for logfile in os.listdir(".snakemake/log/"):
shutil.move(os.path.join(".snakemake/log", logfile), "logs")
shutil.rmtree(".snakemake", ignore_errors=True)
print(
f"\nAn error occured, please consult the latest log file in"
f"{os.path.join(config['workdir'], '.snakemake', 'log')}"
)
try:
for logfile in os.listdir(".snakemake/log/"):
shutil.move(os.path.join(".snakemake/log", logfile), "logs")
shutil.rmtree(".snakemake", ignore_errors=True)
print(
f"\nAn error occured, please consult the latest log file in"
f"{os.path.join(config['workdir'], '.snakemake', 'log')}"
)
except:
print(
f"\nAn error occured, please consult the snakemake log file in"
f"the current working directory."
)
with open(pipe_log, "a") as f:
f.write(f"[{get_local_time()}]: Pipeline stopped on error\n")
4 changes: 2 additions & 2 deletions workflow/rules/dada2.smk
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ rule denoise:
r1="{sample}/trimmed/{sample}_R1.fastq",
r2="{sample}/trimmed/{sample}_R2.fastq",
output:
r1_filt=temp("{sample}/denoising/{sample}_R1_filtered.fasta"),
r2_filt=temp("{sample}/denoising/{sample}_R2_filtered.fasta"),
r1_filt=temp("{sample}/denoising/{sample}_R1_filtered.fasta.gz"),
r2_filt=temp("{sample}/denoising/{sample}_R2_filtered.fasta.gz"),
errplotF="{sample}/denoising/{sample}_R1_errorRates.pdf",
errplotR="{sample}/denoising/{sample}_R2_errorRates.pdf",
denoiseR1="{sample}/denoising/{sample}_R1_denoising.txt",
Expand Down
121 changes: 93 additions & 28 deletions workflow/scripts/dada.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,44 @@
#!/usr/bin/env Rscript


# logging
log = file(snakemake@log[[1]], open="wt")
sink(log)
sink(log, type = "message")

#DEBUGG
# log = file("logfile.log", open="wt")
# sink(log)
# sink(log, type = "message")

# threads <- 6
# max_EE <- 0
# sample.names <- "test"
# filtFs <- "R1.fa"
# filtRs <- "R2.fa"

# fnFs <- "/home/debian/NGS/metabarcoding_paper/foodme_benchmark/FooDMe_paper/.tests/NTC/trimmed/NTC_R1.fastq"
# fnRs <- "/home/debian/NGS/metabarcoding_paper/foodme_benchmark/FooDMe_paper/.tests/NTC/trimmed/NTC_R2.fastq"

# fnFs <- "/home/debian/NGS/metabarcoding_paper/foodme_benchmark/FooDMe_paper/.tests/NTC/trimmed/NTC_R1x.fastq"
# fnRs <- "/home/debian/NGS/metabarcoding_paper/foodme_benchmark/FooDMe_paper/.tests/NTC/trimmed/NTC_R2x.fastq"

# fnFs <- "/home/debian/NGS/metabarcoding_paper/foodme_benchmark/FooDMe_paper/.tests/Lasagne_horse/trimmed/Lasagne_horse_R1.fastq"
# fnRs <- "/home/debian/NGS/metabarcoding_paper/foodme_benchmark/FooDMe_paper/.tests/Lasagne_horse/trimmed/Lasagne_horse_R2.fastq"
##


# Imports
library(ggplot2, quiet=T)
library(dada2, quiet=T)

# Get parameters from snakemake ---------------------------------------------------------------------------------------------
# Input

# Get parameters from snakemake
# Inputs
fnFs <- snakemake@input[["r1"]]
fnRs <- snakemake@input[["r2"]]

# Output
# Outputs
filtFs <- snakemake@output[["r1_filt"]]
filtRs <- snakemake@output[["r2_filt"]]
errplotF <- snakemake@output[["errplotF"]]
Expand All @@ -31,25 +61,34 @@ maxlength <- snakemake@params[["max_length"]]
max_mismatch <- snakemake@params[["max_mismatch"]]
chimera <- snakemake@params[["chimera"]]

# logging
log = file(snakemake@log[[1]], open="wt")
sink(log)
sink(log, type = "message")

getN <- function(x) {
tryCatch(sum(getUniques(x)),
error=function(e){return(0)})
}


# DADA Workflow -------------------------------------------------------------------------------------------------------------

# Is there a more elegant solution here to handle no results cases?
# can't find a way to properly handle exceptions in R1

# Filter reads
names(filtFs) <- sample.names
names(filtRs) <- sample.names

out <- suppressWarnings(
withCallingHandlers(
expr=filterAndTrim(fnFs, filtFs, fnRs, filtRs,
maxN=0, maxEE=max_EE, rm.phix=TRUE,
compress=TRUE, multithread=threads, verbose=TRUE),
error=function(e) {message(e$message)},
warning=function(w) {message(w$message)
writeLines("", filtRs)
writeLines("", filtFs)
}
))

# Learn Error rate
tryCatch({
# Filter reads
names(filtFs) <- sample.names
names(filtRs) <- sample.names
out <- filterAndTrim(fnFs, filtFs, fnRs, filtRs,
maxN=0, maxEE=max_EE, rm.phix=TRUE,
compress=TRUE, multithread=threads, verbose=TRUE)

# Learn Error rate
errF <- learnErrors(filtFs, multithread=threads, verbose=TRUE)
errR <- learnErrors(filtRs, multithread=threads, verbose=TRUE)

Expand Down Expand Up @@ -112,7 +151,6 @@ tryCatch({
writeLines(asfasta, chimeras_fasta)

# Report
getN <- function(x) sum(getUniques(x))
track <- cbind(sample.names,
out,
round((out[1]-out[2])/out[1]*100, 2),
Expand Down Expand Up @@ -147,15 +185,11 @@ tryCatch({
"Reads in ASVs [%]")
write.table(track, report, quote = FALSE, sep = "\t", row.names = FALSE)

}, error = function(c) {
}, error = function(e) {
warning(paste0("Error message: ", e$message))
warning("Terminated process on error.")
warning("Empty output files generated")

# Write empty files
if (!exists(filtRs)){
writeLines("", filtRs)
}
if (!exists(filtFs)){
writeLines("", filtFs)
}
if (!exists(errplotF)){
ggplot() +
theme_void() +
Expand Down Expand Up @@ -186,8 +220,39 @@ tryCatch({
writeLines("", asv_table)
}
if (!exists(report)){
writeLines("Sample Total reads Filtered reads Discarded reads [%] Denoised R1 Denoised R2 Merged Merging failures [%] ASV Size-filtered ASV Discarded ASVs [% ASV] Discarded ASVs [% of reads] Non-chimeric ASV Chimeras [% ASV] Chimeras [% of reads] Reads in ASVs Reads in ASVs [%]",
report)
track <- cbind(sample.names,
out,
round((out[1]-out[2])/out[1]*100, 2),
getN(dadaFs),
getN(dadaRs),
getN(mergers),
round((1-getN(mergers)/out[2])*100, 2),
0,
0,
0,
0,
0,
0,
0,
0,
0)
colnames(track) <- c("Sample",
"Total reads", "Filtered reads",
"Discarded reads [%]",
"Denoised R1",
"Denoised R2",
"Merged",
"Merging failures [%]",
"ASV",
"Size-filtered ASV",
"Discarded ASVs [% ASV]",
"Discarded ASVs [% of reads]",
"Non-chimeric ASV",
"Chimeras [% ASV]",
"Chimeras [% of reads]",
"Reads in ASVs",
"Reads in ASVs [%]")
write.table(track, report, quote = FALSE, sep = "\t", row.names = FALSE)
}
if (!exists(chimeras_fasta)){
writeLines("", chimeras_fasta)
Expand Down
4 changes: 4 additions & 0 deletions workflow/scripts/write_report.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -332,5 +332,9 @@ knitr::kable(soft_table)
## Help

[Link to the repository](https://github.com/CVUA-RRW/FooDMe)


[Report a bug or ask a question](https://github.com/CVUA-RRW/FooDMe/issues/new)


[Last release](https://github.com/CVUA-RRW/FooDMe/releases)

0 comments on commit 07e5d4e

Please sign in to comment.