In [None]:
library(DESeq2)
library(gplots)
library(RColorBrewer)
library(calibrate)
library(vsn)
library(pheatmap)
library(ggplot2)

options(repr.plot.width=12, repr.plot.height=12)
pacman::p_load(here,  
               tidyverse, 
               janitor, # Cleaning column names  
               scales, # Transform axis scales   
               ggrepel) # Optimise plot label separation 

## Build count matrix

In [None]:
countdata = read.csv('../results/count_matrix_DMD.csv', header=TRUE, row.names=1)
countdata = as.matrix(countdata)
head(countdata)

In [None]:
(condition = factor(
    c(rep(c("mdx"), 4),
      rep(c("dmd"), 3)
     )
))
(coldata = data.frame(row.names=colnames(countdata), condition))

## Initialize `DDS` object and perform fitting

In [None]:
dds = DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design=~condition)
dds$condition = relevel(dds$condition, ref="mdx")
keep = rowSums(counts(dds)) >= 10
dds = dds[keep,]
dds

In [None]:
dds = DESeq(dds)

## Different `VST` plots

In [None]:
ntd = normTransform(dds)
vsd = vst(dds, blind=FALSE)
rld = rlog(dds, blind=FALSE)
meanSdPlot(assay(ntd))

In [None]:
meanSdPlot(assay(vsd))

In [None]:
meanSdPlot(assay(rld))

## Plot dispersion estimates

In [None]:
plotDispEsts(dds, main="Dispersion plot")

In [None]:
res.disp <- as.data.frame(mcols(dds, use.names=TRUE))

ggplot(res.disp, aes(x = baseMean, y = dispGeneEst)) +
    geom_point(shape=21, color="black", alpha=0.5, size=0.4) +
    geom_point(aes(y = dispersion, shape = dispOutlier), color="#26b3ff", alpha=0.25, size=3.5) +
    geom_point(aes(y = dispFit), color="red", alpha=0.24, size=0.41) +
    xlab("mean of normalized counts") +
    ylab("Dispersion") + 
    scale_y_log10() +
    scale_x_log10() +
    theme(text=element_text(size=16))

## Various heatmaps

In [None]:
sampleDists <- dist(t(assay(vsd)))
sampleDistMatrix = as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(rld$condition, rld$type, sep="-")
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)

pheatmap(sampleDistMatrix,
         clustering_distance_rows=sampleDists,
         clustering_distance_cols=sampleDists,
         col=colors, fontsize=16)

In [None]:
select <- order(rowMeans(counts(dds,normalized=TRUE)),
                decreasing=TRUE)[1:20]
df <- as.data.frame(colData(dds)[,"condition"])
pheatmap(assay(ntd)[select,], cluster_rows=FALSE, show_rownames=FALSE, fontsize=16,
         cluster_cols=FALSE, annotation_col=coldata)

In [None]:
pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=FALSE, fontsize=16,
         cluster_cols=FALSE, annotation_col=coldata)

In [None]:
pheatmap(assay(rld)[select,], cluster_rows=FALSE, show_rownames=FALSE, fontsize=16,
         cluster_cols=FALSE, annotation_col=coldata)

## MA plots

### `DGE` results

In [None]:
res = results(dds, contrast=c("condition","dmd","mdx"))
res05 = results(dds, contrast=c("condition","dmd","mdx"), alpha=0.05)
summary(res05)

In [None]:
ggplotMA <- function(res, ...) {
    res <- as.data.frame(res) %>%
        mutate(sig = case_when(padj <= 0.05 ~ "yes",
                               TRUE ~ "no"))

    cols <- c("yes" = "#26b3ff", "no" = "grey") 
    sizes <- c("yes" = 2.5, "no" = 1.5) 
    alphas <- c("yes" = 1, "no" = 0.5)

    ggplot(res, aes(x = baseMean,
                        y = log2FoldChange,
                        fill = sig,
                        size = sig,
                        alpha = sig
                       )) +
      geom_point(shape=21, color="black") +
      xlab("mean of normalized counts") +
      ylab("log2 Fold Change") + 
      geom_hline(yintercept=c(1, -1), linetype="dashed") +
      scale_x_log10() +
      scale_fill_manual(values = cols) +
      scale_size_manual(values = sizes) +
      scale_alpha_manual(values = alphas) +
      theme(text=element_text(size=16))   
}

In [None]:
plotMA(res, ylim=c(-2,2))

In [None]:
ggplotMA(res)

In [None]:
resLFC = lfcShrink(dds, coef="condition_dmd_vs_mdx", type="apeglm")
plotMA(resLFC, ylim=c(-2,2))

In [None]:
ggplotMA(resLFC)

## `PCA` and volcano plots

In [None]:
pcaData <- plotPCA(rld, intgroup=c("condition"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))

ggplot(pcaData, aes(PC1, PC2, color=condition)) +
  geom_point(size=8, alpha=0.5) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
  theme(text=element_text(size=16))
  

In [None]:
hist(res$pvalue, breaks=50, col="grey")

In [None]:
resdata = merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE)
resdata <- resdata %>%
    mutate(gene_type = case_when(log2FoldChange >= 1 & padj <= 0.05 ~ "up",
                                 log2FoldChange <= -1 & padj <= 0.05 ~ "down",
                                TRUE ~ "ns"))

resdata %>% count(gene_type)

In [None]:
head(resdata)

In [None]:
cols <- c("up" = "#ffad73", "down" = "#26b3ff", "ns" = "grey") 
sizes <- c("up" = 5, "down" = 5, "ns" = 2.5) 
alphas <- c("up" = 1, "down" = 1, "ns" = 0.5)

ggplot(resdata, aes(x = log2FoldChange,
                    y = -log10(padj),
                    fill = gene_type,
                    size = gene_type,
                    alpha = gene_type
                   )) +
  geom_point(shape=21, color="black") +
  ylab("-log(adj pvalue)") +
  xlab("log2 Fold Change") + 
  geom_hline(yintercept=-log10(0.05), linetype="dashed") +
  geom_vline(xintercept=c(log2(0.5), log2(2)), linetype="dashed") +
  scale_fill_manual(values = cols) +
  scale_size_manual(values = sizes) +
  scale_alpha_manual(values = alphas) +
  theme(text=element_text(size=16))

## Save `DGE` results

In [None]:
res = results(dds, contrast=c("condition","dmd","mdx"))
res = res[order(res$padj),]
resdata = merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE)
names(resdata)[1] = "Gene"
write.csv(resdata, file="../results/DMD.deseq.csv")
head(resdata)