### DESEQ2 analysis of AD/PD dataset

In [1]:
rm(list=ls())
#load necessary libraries 
library(ggplot2)
library(DESeq2)
library("BiocParallel")
parallelFlag=TRUE
register(MulticoreParam(50))
library("IHW")
library("pheatmap")
library(sva)
library(limma)

Loading required package: S4Vectors
Loading required package: stats4
Loading required package: BiocGenerics
Loading required package: parallel

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:parallel’:

    clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
    clusterExport, clusterMap, parApply, parCapply, parLapply,
    parLapplyLB, parRapply, parSapply, parSapplyLB

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, append, as.data.frame, basename, cbind, colMeans,
    colnames, colSums, dirname, do.call, duplicated, eval, evalq,
    Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply,
    lengths, Map, mapply, match, mget, order, paste, pmax, pmax.int,
    pmin, pmin.int, Position, rank, rbind, Reduce, rowMeans, rownames,
    rowSums, sapply, setdiff, sort, table, tapply, union, unique,
    unsplit, which, which.max, which

## Load data and design

In [2]:
#load ATAC-seq raw read counts
data=read.table('../adpd.atac.idr.counts.txt.gz',header=TRUE,sep='\t')
#concatenate chrom/start/end columns values to server as rownames for the dataframe of the form chrom_start_end 
rownames(data)=paste(data$chrom,data$start,data$end,sep="_")
data$chrom=NULL
data$start=NULL
data$end=NULL

data=data[rowSums(data)>0,]


In [4]:
#load the metadata
batches=read.table("../batches.filtered.csv",header=TRUE,sep='\t')

In [7]:
Grouping <- factor(paste0(batches$Cohort,".",batches$RegionMod, ".", batches$TypeMod))
batches$Grouping=Grouping

In [8]:
#SVA can't handle NA values, so we have no choice but to interpolate to the mode for missing entries in PMI & ApoE 
batches$ApoE[is.na(batches$ApoE)]='3_3'
batches$PMI[is.na(batches$PMI)]=mean(na.omit(batches$PMI))

## Create the DESeq2 Object

In [17]:
#Create DESeq object
dds <- DESeqDataSetFromMatrix(countData = data,
                              colData = batches,
                              design = ~Grouping+Gender+expired_age+PMI+ApoE)#+TissueCenter +Batch
#TissueCenter and Batch are confounded


  the design formula contains a numeric variable with integer values,
  specifying a model with increasing fold change for higher values.
  did you mean for this to be a factor? if so, first convert
  this variable to a factor using the factor() function


## Differential Accessibility Operation

In [None]:
#Run the differential analysis
dds <- DESeq(dds,parallel = TRUE)

estimating size factors
estimating dispersions
gene-wise dispersion estimates: 50 workers
mean-dispersion relationship
final dispersion estimates, fitting model and testing: 50 workers


## Standard BH Correction (no optimal thresholding) 

In [20]:
res=results(dds)
summary(res)

res=results(dds,independentFiltering=FALSE)
summary(res)

res=results(dds,filterFun = ihw)
summary(res)


out of 385725 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 1216, 0.32%
LFC < 0 (down)     : 2535, 0.66%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results


out of 385725 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 1216, 0.32%
LFC < 0 (down)     : 2535, 0.66%
outliers [1]       : 0, 0%
low counts [2]     : 0, 0%
(mean count < 0)
[1] see 'cooksCutoff' argument of ?results
[2] see 'independentFiltering' argument of ?results


out of 385725 with nonzero total read count
adjusted p-value < 0.1
LFC > 0 (up)       : 1062, 0.28%
LFC < 0 (down)     : 9967, 2.6%
outliers [1]       : 0, 0%
[1] see 'cooksCutoff' argument of ?results
see metadata(res)$ihwResult on hypothesis weighting



In [21]:
resultsNames(dds)

In [30]:
comparisons=c("pd_caud_adpd_vs_lopd",
    "pd_caud_adpd_vs_ctrl",
    "pd_caud_lopd_vs_ctrl",
    "pd_hipp_adpd_vs_lopd",
    "pd_hipp_adpd_vs_ctrl",
    "pd_hipp_lopd_vs_ctrl",
    "pd_mdfg_adpd_vs_lopd",
    "pd_mdfg_adpd_vs_ctrl",
    "pd_mdfg_lopd_vs_ctrl",
    "pd_ptmn_adpd_vs_lopd",
    "pd_ptmn_adpd_vs_ctrl",
    "pd_ptmn_lopd_vs_ctrl",
    "pd_smtg_adpd_vs_lopd",
    "pd_smtg_adpd_vs_ctrl",
    "pd_smtg_lopd_vs_ctrl",
    "pd_suni_adpd_vs_lopd",
    "pd_suni_adpd_vs_ctrl",
    "pd_suni_lopd_vs_ctrl")


In [37]:
contrasts=list(c("Grouping_PD.CAUD.ADPD_vs_AD.CAUD.ADAD","Grouping_PD.CAUD.LOPD_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.CAUD.ADPD_vs_AD.CAUD.ADAD","Grouping_PD.CAUD.CTRL_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.CAUD.LOPD_vs_AD.CAUD.ADAD","Grouping_PD.CAUD.CTRL_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.HIPP.ADPD_vs_AD.CAUD.ADAD","Grouping_PD.HIPP.LOPD_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.HIPP.ADPD_vs_AD.CAUD.ADAD","Grouping_PD.HIPP.CTRL_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.HIPP.LOPD_vs_AD.CAUD.ADAD","Grouping_PD.HIPP.CTRL_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.MDFG.ADPD_vs_AD.CAUD.ADAD","Grouping_PD.MDFG.LOPD_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.MDFG.ADPD_vs_AD.CAUD.ADAD","Grouping_PD.MDFG.CTRL_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.MDFG.LOPD_vs_AD.CAUD.ADAD","Grouping_PD.MDFG.CTRL_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.PTMN.ADPD_vs_AD.CAUD.ADAD","Grouping_PD.PTMN.LOPD_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.PTMN.ADPD_vs_AD.CAUD.ADAD","Grouping_PD.PTMN.CTRL_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.PTMN.LOPD_vs_AD.CAUD.ADAD","Grouping_PD.PTMN.CTRL_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.SMTG.ADPD_vs_AD.CAUD.ADAD","Grouping_PD.SMTG.LOPD_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.SMTG.ADPD_vs_AD.CAUD.ADAD","Grouping_PD.SMTG.CTRL_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.SMTG.LOPD_vs_AD.CAUD.ADAD","Grouping_PD.SMTG.CTRL_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.SUNI.ADPD_vs_AD.CAUD.ADAD","Grouping_PD.SUNI.LOPD_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.SUNI.ADPD_vs_AD.CAUD.ADAD","Grouping_PD.SUNI.CTRL_vs_AD.CAUD.ADAD"),
            c("Grouping_PD.SUNI.LOPD_vs_AD.CAUD.ADAD","Grouping_PD.SUNI.CTRL_vs_AD.CAUD.ADAD"))

In [39]:
pval_thresh=0.05
lfc_thresh=1

In [None]:
##get the results for the various contrasts 
numcomparisons=length(contrasts)
for(i in seq(1,numcomparisons))
{
 res=results(dds,contrast=contrasts[i],parallel=TRUE) 
 res$logPadj=-1*log10(res$padj)
 res=as.data.frame(res)
 res=na.omit(res)
 res$sig=res$padj<=pval_thresh & abs(res$log2FoldChange)>lfc_thresh
    
 #extract the differential peaks 
 sigsubset=res[res$sig==TRUE,]
 sig=nrow(sigsubset)
 up=sum(sigsubset$log2FoldChange>0)
 down=sum(sigsubset$log2FoldChange<0)
 curtitle=paste(comparisons[i],'\n','sig:',sig,'\n','up:',up,'\n','down:',down,'\n')   
 print(curtitle)
    
 #generate a volcano plot 
 png(paste("volcano_diff",comparisons[i],".png",sep=""))
 print(ggplot(data=res,
               aes(y=res$logPadj,x=res$log2FoldChange,color=res$sig))+
               geom_point(alpha=0.1)+
               xlab("log2(FC)")+
               ylab("-log10(pval)")+
               theme_bw()+
               scale_color_manual(values=c("#000000","#FF0000"))+
               ggtitle(curtitle))
  dev.off() 
  #write differential peaks to a TSV file 
  write.table(sigsubset,file=paste("diff_",comparisons[i],".tsv",sep=""),
              quote=FALSE,sep='\t',row.names = TRUE,col.names = TRUE)
}

[1] "pd_caud_adpd_vs_lopd \n sig: 223506 \n up: 112586 \n down: 110920 \n"


## Repeat analysis with Type and Region 

In [None]:
Grouping <- factor(paste0(batches$Cohort,".",batches$Region, ".", batches$Type))
batches$Grouping=Grouping

In [None]:
#Create DESeq object
dds2 <- DESeqDataSetFromMatrix(countData = data,
                              colData = batches,
                              design = ~Grouping+Gender+expired_age+PMI+ApoE)#TissueCenter +Batch

In [None]:
#Run the differential analysis
dds2 <- DESeq(dds2,parallel = TRUE)

In [None]:
comparisons=c("pd_caud_gba1_vs_lrrk",
    "pd_caud_gba1_vs_spor",
    "pd_caud_gba1_vs_ctrl",
    "pd_caud_lrrk_vs_spor",
    "pd_caud_lrrk_vs_ctrl",
    "pd_caud_spor_vs_ctrl",
    "pd_hipp_gba1_vs_lrrk",
    "pd_hipp_gba1_vs_spor",
    "pd_hipp_gba1_vs_ctrl",
    "pd_hipp_lrrk_vs_spor",
    "pd_hipp_lrrk_vs_ctrl",
    "pd_hipp_spor_vs_ctrl",
    "pd_mdfg_gba1_vs_lrrk",
    "pd_mdfg_gba1_vs_spor",
    "pd_mdfg_gba1_vs_ctrl",
    "pd_mdfg_lrrk_vs_spor",
    "pd_mdfg_lrrk_vs_ctrl",
    "pd_mdfg_spor_vs_ctrl",
    "pd_mdtg_gba1_vs_lrrk",
    "pd_mdtg_gba1_vs_spor",
    "pd_mdtg_gba1_vs_ctrl",
    "pd_mdtg_lrrk_vs_spor",
    "pd_mdtg_lrrk_vs_ctrl",
    "pd_mdtg_spor_vs_ctrl",
    "pd_ptmn_gba1_vs_lrrk",
    "pd_ptmn_gba1_vs_spor",
    "pd_ptmn_gba1_vs_ctrl",
    "pd_ptmn_lrrk_vs_spor",
    "pd_ptmn_lrrk_vs_ctrl",
    "pd_ptmn_spor_vs_ctrl",
    "pd_suni_gba1_vs_lrrk",
    "pd_suni_gba1_vs_spor",
    "pd_suni_gba1_vs_ctrl",
    "pd_suni_lrrk_vs_spor",
    "pd_suni_lrrk_vs_ctrl",
    "pd_suni_spor_vs_ctrl")


In [None]:
contrasts=list(c("Grouping_PD.CAUD.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.CAUD.LRRK_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.CAUD.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.CAUD.SPOR_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.CAUD.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.CAUD.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.CAUD.LRRK_vs_AD.CAUD.ADAD","Grouping_PD.CAUD.SPOR_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.CAUD.LRRK_vs_AD.CAUD.ADAD","Grouping_PD.CAUD.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.CAUD.SPOR_vs_AD.CAUD.ADAD","Grouping_PD.CAUD.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.HIPP.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.HIPP.LRRK_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.HIPP.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.HIPP.SPOR_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.HIPP.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.HIPP.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.HIPP.LRRK_vs_AD.CAUD.ADAD","Grouping_PD.HIPP.SPOR_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.HIPP.LRRK_vs_AD.CAUD.ADAD","Grouping_PD.HIPP.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.HIPP.SPOR_vs_AD.CAUD.ADAD","Grouping_PD.HIPP.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.MDFG.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.MDFG.LRRK_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.MDFG.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.MDFG.SPOR_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.MDFG.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.MDFG.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.MDFG.LRRK_vs_AD.CAUD.ADAD","Grouping_PD.MDFG.SPOR_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.MDFG.LRRK_vs_AD.CAUD.ADAD","Grouping_PD.MDFG.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.MDFG.SPOR_vs_AD.CAUD.ADAD","Grouping_PD.MDFG.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.MDTG.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.MDTG.LRRK_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.MDTG.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.MDTG.SPOR_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.MDTG.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.MDTG.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.MDTG.LRRK_vs_AD.CAUD.ADAD","Grouping_PD.MDTG.SPOR_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.MDTG.LRRK_vs_AD.CAUD.ADAD","Grouping_PD.MDTG.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.MDTG.SPOR_vs_AD.CAUD.ADAD","Grouping_PD.MDTG.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.PTMN.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.PTMN.LRRK_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.PTMN.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.PTMN.SPOR_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.PTMN.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.PTMN.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.PTMN.LRRK_vs_AD.CAUD.ADAD","Grouping_PD.PTMN.SPOR_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.PTMN.LRRK_vs_AD.CAUD.ADAD","Grouping_PD.PTMN.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.PTMN.SPOR_vs_AD.CAUD.ADAD","Grouping_PD.PTMN.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.SUNI.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.SUNI.LRRK_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.SUNI.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.SUNI.SPOR_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.SUNI.GBA1_vs_AD.CAUD.ADAD","Grouping_PD.SUNI.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.SUNI.LRRK_vs_AD.CAUD.ADAD","Grouping_PD.SUNI.SPOR_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.SUNI.LRRK_vs_AD.CAUD.ADAD","Grouping_PD.SUNI.CTRL_vs_AD.CAUD.ADAD"),
    c("Grouping_PD.SUNI.SPOR_vs_AD.CAUD.ADAD","Grouping_PD.SUNI.CTRL_vs_AD.CAUD.ADAD"))


In [None]:
##get the results for the various contrasts 
numcomparisons=length(contrasts)
for(i in seq(1,numcomparisons))
{
 res=results(dds2,contrast=contrasts[i],parallel=TRUE) 
 res$logPadj=-1*log10(res$padj)
 res=as.data.frame(res)
 res=na.omit(res)
 res$sig=res$padj<=pval_thresh & abs(res$log2FoldChange)>lfc_thresh
    
 #extract the differential peaks 
 sigsubset=res[res$sig==TRUE,]
 sig=nrow(sigsubset)
 up=sum(sigsubset$log2FoldChange>0)
 down=sum(sigsubset$log2FoldChange<0)
 curtitle=paste(comparisons[i],'\n','sig:',sig,'\n','up:',up,'\n','down:',down,'\n')   
 print(curtitle)
    
 #generate a volcano plot 
 png(paste("expanded_volcano_diff",comparisons[i],".png",sep=""))
 print(ggplot(data=res,
               aes(y=res$logPadj,x=res$log2FoldChange,color=res$sig))+
               geom_point(alpha=0.1)+
               xlab("log2(FC)")+
               ylab("-log10(pval)")+
               theme_bw()+
               scale_color_manual(values=c("#000000","#FF0000"))+
               ggtitle(curtitle))
  dev.off() 
  #write differential peaks to a TSV file 
  write.table(sigsubset,file=paste("expanded_diff_",comparisons[i],".tsv",sep=""),
              quote=FALSE,sep='\t',row.names = TRUE,col.names = TRUE)
}

In [None]:
#store dds object so it can be loaded readily in the future 
save(dds,dds2 file = "DESEQ2.model.noSVA.RData")