In [2]:
### Load library -----
library(DESeq2)
library(tidyverse)

In [4]:
### import GO DB -----
#Import GO database
GOdata<-read.csv('Reference csv/Cleaned_GO_4.csv', sep=';', header=FALSE)
GOdata$Gene<-as.character(GOdata$V1)
GOdata$V1<-NULL
GOdata$geneabb<-as.character(GOdata$V2)
GOdata$V2<-NULL
GOdata$genename<-as.character(GOdata$V3)
GOdata$V3<-NULL
GOdata$GO<-as.character(GOdata$V4)
GOdata$V4<-NULL

### featureCount data import -----
#Importing output of featurecounts
countdata_wt <- read.table("featureCounts/01.Maya1_WT_leaf", header=TRUE, row.names='Geneid' )
countdata_fls2 <- read.table("featureCounts/02.Maya1_fls2_leaf", header=TRUE, row.names='Geneid' )

#remove first 5 lines which is useless
countdata_wt <- countdata_wt[ ,6:ncol(countdata_wt)]
countdata_fls2 <- countdata_fls2[ ,6:ncol(countdata_fls2)]

#remove .bam in the sample name
colnames(countdata_wt) <- gsub("\\_a.bam$", "", colnames(countdata_wt))
rownames(countdata_wt) <- gsub("gene:", "", rownames(countdata_wt))
colnames(countdata_wt)

colnames(countdata_fls2) <- gsub("\\_a.bam$", "", colnames(countdata_fls2))
rownames(countdata_fls2) <- gsub("gene:", "", rownames(countdata_fls2))
colnames(countdata_fls2)

#Import sample information
saminfo_wt <- read.csv("Saminfo/01.Saminfo_Maya1_leaf_WT.csv")
saminfo_fls2 <- read.csv("Saminfo/02.Saminfo_Maya1_leaf_fls2.csv")

#change column names using saminfo
colnames(countdata_wt) <- saminfo_wt$condition
colnames(countdata_wt)
colnames(countdata_fls2) <- saminfo_fls2$condition
colnames(countdata_fls2)

#sort by column names
saminfo_wt <- arrange(saminfo_wt, index)
countdata_wt <- countdata_wt[,c(saminfo_wt$index)]

saminfo_fls2 <- arrange(saminfo_fls2, index)
countdata_fls2 <- countdata_fls2[,c(saminfo_fls2$index)]

#change it to matrix
countdata_wt <- as.matrix(countdata_wt)
countdata_fls2 <- as.matrix(countdata_fls2)
head(countdata_wt)
head(countdata_fls2)


Unnamed: 0,DMSO_WT,DMSO_WT.1,DMSO_WT.2,Maya1_WT,Maya1_WT.1,Maya1_WT.2
AT1G01010,18,20,15,41,37,41
AT1G01020,85,127,110,101,127,100
AT1G03987,4,2,0,0,0,0
AT1G01030,32,9,45,17,12,31
AT1G01040,110,241,184,140,153,129
AT1G03993,0,0,0,0,0,0


Unnamed: 0,DMSO_fls2,DMSO_fls2.1,DMSO_fls2.2,Maya1_fls2,Maya1_fls2.1,Maya1_fls2.2
AT1G01010,23,29,16,41,48,30
AT1G01020,98,103,63,101,114,48
AT1G03987,0,0,0,0,0,0
AT1G01030,17,10,11,31,22,24
AT1G01040,136,126,83,165,159,113
AT1G03993,0,0,0,0,0,0


In [5]:
### DEseq analysis -----
#define factor for DEseq
condition_wt <- factor(c(rep('DMSO_WT', 3), rep('Maya1_WT', 3)))
condition_fls2 <- factor(c(rep('DMSO_fls2', 3), rep('Maya1_fls2', 3)))

saminfo_wt$colnames_count <- colnames(countdata_wt)
coldata_wt <- data.frame(row.names=colnames(countdata_wt), condition_wt)

saminfo_fls2$colnames_count <- colnames(countdata_fls2)
coldata_fls2 <- data.frame(row.names=colnames(countdata_fls2), condition_fls2)

#Chem screening
dds_wt <- DESeqDataSetFromMatrix(countData=countdata_wt, colData=coldata_wt, design=~condition_wt)
dds_wt <- DESeq(dds_wt)
colnames(dds_wt) <- condition_wt

dds_fls2 <- DESeqDataSetFromMatrix(countData=countdata_fls2, colData=coldata_fls2, design=~condition_fls2)
dds_fls2 <- DESeq(dds_fls2)
colnames(dds_fls2) <- condition_fls2

res_wt <- results(dds_wt, contrast=c('condition_wt', 'Maya1_WT', 'DMSO_WT'))
res_fls2 <- results(dds_fls2, contrast=c('condition_fls2', 'Maya1_fls2', 'DMSO_fls2'))

table(res_wt$padj<0.1)
table(res_fls2$padj<0.1)

resDF_wt<-as.data.frame(res_wt)
resDF_fls2<-as.data.frame(res_fls2)

output_wt <- merge(resDF_wt, as.data.frame(counts(dds_wt, normalized=TRUE)), by="row.names", sort=FALSE)
output_fls2 <- merge(resDF_fls2, as.data.frame(counts(dds_fls2, normalized=TRUE)), by="row.names", sort=FALSE)

names(output_wt)[1] <- "Gene"
names(output_fls2)[1] <- "Gene"

output_wt<-merge(GOdata, output_wt, by="Gene")
output_fls2<-merge(GOdata, output_fls2, by="Gene")

write.csv(output_wt, file="DESeq2/DEseq2_results_WT_Maya1_leaf.csv")
write.csv(output_fls2, file="DESeq2/DEseq2_results_fls2_Maya1_leaf.csv")

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing

estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing




FALSE  TRUE 
10823   451 


FALSE  TRUE 
12486   211 

In [6]:
### Generate final table -----
Total <- merge(output_wt, output_fls2, by = "Gene")

colnames(Total)
Total <- Total %>%
  dplyr::select(Gene, geneabb.x, genename.x, log2FoldChange.x, padj.x,
                log2FoldChange.y, padj.y,
                starts_with("DMSO_WT"), starts_with("Maya1_WT"),
                starts_with("DMSO_fls2"), starts_with("Maya1_fls2"),
                GO.x)
colnames(Total)

colnames(Total) <- c("Gene", "Gene_Symbol", "Gene_Name", 
                     "Log2FC_WT", "Padj_WT", 
                     "Log2FC_fls2", "Padj_fls2", 
                     "DMSO_WT_rep1", "DMSO_WT_rep2", "DMSO_WT_rep3",
                     "Maya1_WT_rep1", "Maya1_WT_rep2", "Maya1_WT_rep3",                     
                     "DMSO_fls2_rep1", "DMSO_fls2_rep2", "DMSO_fls2_rep3", 
                     "Maya1_fls2_rep1", "Maya1_fls2_rep2", "Maya1_fls2_rep3", 
                     "GO")
colnames(Total)

write.csv(Total, file="Total/Total_Maya1_leaf.csv")
