Skip to content

Commit

Permalink
fixed some problems + started to implement quantile normalisation (pr…
Browse files Browse the repository at this point in the history
…oblems)
  • Loading branch information
jvanheld committed Jul 29, 2018
1 parent 1b3f2ff commit 1f554eb
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 31 deletions.
33 changes: 21 additions & 12 deletions scripts/RSnakeChunks/R/normalise_count_table.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
#' @title Between-sample normalisation of RNA-seq count table.
#'
#'
#' @author Jacques van Helden and Mustafa AbuElqumsan
#'
#' @description Run between-sample normalization of an RNA-seq count table with a variety of methods (including edgeR and DESeq2 methods).
Expand Down Expand Up @@ -92,7 +92,7 @@ NormalizeCountTable <- function(counts,
epsilon = epsilon
)

#### Measure size of input matrix ####
## ---- Measure size of input matrix ----
result$raw.features <- nrow(counts)
result$raw.samples <- ncol(counts)

Expand All @@ -101,7 +101,7 @@ NormalizeCountTable <- function(counts,
"\tRaw counts: ", result$raw.features, " features x ", result$raw.samples, " samples. ")
}

#### Compute non-zero counts ####
## ---- Compute non-zero counts ----
counts.nozero <- counts
counts.nozero[counts == 0] <- NA
if (nozero) {
Expand All @@ -115,7 +115,7 @@ NormalizeCountTable <- function(counts,
}


#### Compute sample-wise statistics ####
## ---- Compute sample-wise statistics ----
sampleStats <- data.frame(
zeros = apply(counts == 0, 2, sum),
na.values = apply(is.na(counts), 2, sum),
Expand Down Expand Up @@ -151,10 +151,10 @@ NormalizeCountTable <- function(counts,
result$sampleStats <- sampleStats
# head(self$sampleStats)

#### Apply normalization method(s) ####
## ---- Apply normalization method(s) ----
method.names <- vector()
for (m in method) {
#### Define method name ####
## Define method name
if (m == "percentile") {
if (!exists("percentile")) {
stop("NormalizeSamples()\tMissing required parameter: standardization percentile")
Expand Down Expand Up @@ -276,6 +276,13 @@ NormalizeCountTable <- function(counts,
scaling.factor <- 1/size.factor
# plot(size.factor, sampleStats$sum) ## THE DIFFERENCE IS QUITE IMPRESSIVE

} else if (m == "quantiles") {
## Normalize the count table using limma::normalizeQuantiles() function
require("limma")
normCounts <- limma::normalizeQuantiles(A = counts.to.norm, ties = TRUE)
size.factor <- NA
scaling.factor <- NA

} else if (m == "VSD") {
# Compute variance stabilizing transformations (VST) via DESeq2 (Tibshirani 1988; Huber et al. 2003; Anders and Huber 2010)
if (verbose >= 3) {
Expand Down Expand Up @@ -306,7 +313,7 @@ NormalizeCountTable <- function(counts,
stop(method, " is not a valid method for NormalizeSamples()")
}

#### Discarded samples ####
## ---- Discarded samples ## ----
## Detect problems related to null scaling factors, which may happen in some datasets due to a very large number of zeros.
discardedSamples <-
(size.factor == 0) |
Expand All @@ -321,12 +328,14 @@ NormalizeCountTable <- function(counts,
}
}

#### Compute normalised counts ####
normTarget <- mean(scaling.factor[!discardedSamples]) ## Ensure library eize equality before and after standardization
scaling.factor <- scaling.factor * normTarget
normCounts <- t(t(counts.to.norm[, !discardedSamples]) * scaling.factor)
## ---- Compute normalised counts ----
if (!is.na(scaling.factor)) {
normTarget <- mean(scaling.factor[!discardedSamples]) ## Ensure library eize equality before and after standardization
scaling.factor <- scaling.factor * normTarget
normCounts <- t(t(counts.to.norm[, !discardedSamples]) * scaling.factor)
}

#### log2 transformation (if required) ####
## ---- log2 transformation (if required) ----
if (log2) {
normCounts <- log2(normCounts + epsilon)
}
Expand Down
73 changes: 54 additions & 19 deletions scripts/RSnakeChunks/misc/rna-seq_normalisation_tests.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,12 @@

## By default this script is invoked on the command line with options parsed from the arguments.
##
## Alternatively, the options can be defined separately, so that the script can be called from another R script.
## Alternatively, the options can be defined separately, so that the script can be called from another R script.
##
## Integration in a snakemake rule will be evaluated soon.
## Integration in a snakemake rule will be evaluated soon.

library('rmarkdown')
library('limma')

if (!exists("opt")) {
message("Reading parameters from the command line")
Expand Down Expand Up @@ -39,10 +40,11 @@ if (is.null(opt$verbose) ) {
opt$verbose = 1
}

## If not specified,
## If not specified,
if (is.null(opt$main_dir) ) {
opt$main_dir = getwd()
}
dir.main <- opt$main_dir

## Mandatory arguments
if (is.null(opt$config_file) ) {
Expand Down Expand Up @@ -94,8 +96,35 @@ message("\tCount table: ", count.table)
index.rmd <- "index.Rmd"
index.socket <- file(index.rmd)


index.text <- "# RNA-seq analysis report"
Rmd.header <- '---
title: "RNA-seq analysis report"
author:
name: "[AUTHOR NAME]"
email: "[AUTHOR EMAIL]"
date: Last update:`r format(Sys.time())`
output:
pdf_document:
fig_caption: yes
highlight: zenburn
toc: yes
toc_depth: 3
html_document:
code_folding: show
fig_caption: yes
highlight: zenburn
self_contained: yes
theme: cerulean
toc: yes
toc_depth: 3
toc_float: yes
word_document:
toc: yes
toc_depth: 2
---
'

index.text <- Rmd.header


## ---- knitr_setup, include=FALSE, eval=TRUE, echo=FALSE, warning=FALSE----
Expand Down Expand Up @@ -178,9 +207,9 @@ message("\tLoaded parameters from file ", configFile)
if (!is.null(parameters$dir$snakechunks)) {
dirs["SnakeChunks"] <- file.path(dirs["main"], parameters$dir$snakechunks)
message("\tSnakeChunks directory:\t", dirs["SnakeChunks"])

## ---- Load R functions from SnakeChunks --------------------

## NOTE: if the RSnakechunks package has been compiled on this machine
## the functions will be loaded with library('RSnakeChunks'). However
## for the ime being we do not assume that RSnakeChunks has been
Expand Down Expand Up @@ -315,12 +344,12 @@ thresholds <- parameters$DEG$thresholds

## Print the threshold tables
## NOTE: I should evaluate what I do with the kable calls
index.text <- append(index.text, "\n\n# Parameters\n")
index.text <- append(index.text, "\n\n## Parameters\n")
index.text <- append(
index.text,
index.text,
kable(t(as.data.frame(thresholds)), col.names = "Threshold",
caption = "Thresholds for the selection of differentially expressed genes. "))

outfiles["thresholds"] <- file.path(dir.tables.samples, "thresholds.tsv")
write.table(x = t(as.data.frame(thresholds)),
file = outfiles["thresholds"],
Expand Down Expand Up @@ -630,8 +659,14 @@ LibsizeBarplot(counts = filtered.counts, sample.labels = sample.desc$label, samp
silence <- dev.off(); rm(silence)


## ----normalisation-------------------------------------------------------
norm.methods <- c("none", "mean", "median", "percentile", "TMM", "DESeq2")
## ---- normalisation -------------------------------------------------------

if (is.null(parameters$DEG$norm_method)) {
#norm.methods <- c("none", "mean", "median", "percentile", "TMM", "DESeq2", "quantiles")
norm.methods <- c("none", "mean", "median", "percentile", "TMM", "DESeq2")
} else {
norm.methods <- parameters$DEG$norm_method
}
norm.comparison <- NormalizeCountTable(
counts = filtered.counts, class.labels = sample.conditions, nozero = TRUE,
method = norm.methods, percentile = 75, log2 = FALSE, epsilon = 0.1, detailed.sample.stats = TRUE,
Expand Down Expand Up @@ -765,7 +800,7 @@ for (i in 1:nrow(design)) {
edgeR.norm.methods <- c("TMM","RLE","upperquartile","none")
#edgeR.norm.methods <- c("TMM","RLE","upperquartile","none")
} else {
parameters$edgeR$norm_method
edgeR.norm.methods <- parameters$edgeR$norm_method
}
for (norm.method in edgeR.norm.methods) {

Expand Down Expand Up @@ -912,7 +947,7 @@ for (i in 1:nrow(design)) {
## system(paste("ls -ltr ", figfiles[prefix]))



## ---- Comparison between p-value histograms -----
## Draw Volcano plots
# deg.name <- "DESeq2"
Expand Down Expand Up @@ -970,19 +1005,19 @@ for (i in 1:nrow(design)) {

## ---- Index input / output files and directories -----
index <- data.frame()
index <- rbind(index,
index <- rbind(index,
data.frame(
type = "directory",
name = names(dirs),
path = dirs
))
index <- rbind(index,
index <- rbind(index,
data.frame(
type = "input file",
name = names(infiles),
path = infiles
))
index <- rbind(index,
index <- rbind(index,
data.frame(
type = "output file",
name = names(outfiles),
Expand Down Expand Up @@ -1044,9 +1079,9 @@ for (filename in names(figfiles)) {

index.text <- append(index.text, paste(sep = "", "\n\nJob done: ", Sys.time()))

writeLines(text = index.text, con = index.socked)
writeLines(text = index.text, con = index.socket)
close(index.socket)
rmarkdown::render(index.rmd)
rmarkdown::render(index.Rmd)


## ---- job_done ------------------------------------------------------------
Expand Down

0 comments on commit 1f554eb

Please sign in to comment.