In [2]:
### Load library -----
library(DESeq2)
library(tidyverse)


In [3]:
### import GO DB -----
#Import GO database
GOdata<-read.csv('Reference csv/Cleaned_GO_4.csv', sep=';', header=FALSE)
GOdata$Gene<-as.character(GOdata$V1)
GOdata$V1<-NULL
GOdata$geneabb<-as.character(GOdata$V2)
GOdata$V2<-NULL
GOdata$genename<-as.character(GOdata$V3)
GOdata$V3<-NULL
GOdata$GO<-as.character(GOdata$V4)
GOdata$V4<-NULL

### featureCount data import -----
#Importing output of featurecounts
countdata_flg22 <- read.table("FeatureCounts/03.DMSO_flg22_WT_Leaf", header=TRUE, row.names='Geneid' )
#remove first 5 lines which is useless
countdata_flg22 <- countdata_flg22[ ,6:ncol(countdata_flg22)]
#remove .bam in the sample name
colnames(countdata_flg22) <- gsub("\\_a.bam$", "", colnames(countdata_flg22))
rownames(countdata_flg22) <- gsub("gene:", "", rownames(countdata_flg22))
colnames(countdata_flg22)
#Import sample information
saminfo_flg22 <- read.csv("Saminfo/03.Saminfo_DMSO_flg22_WT_Leaf.csv")
#change column names using saminfo
colnames(countdata_flg22) <- saminfo_flg22$condition
colnames(countdata_flg22)
#sort by column names
saminfo_flg22 <- arrange(saminfo_flg22, index)
countdata_flg22 <- countdata_flg22[,c(saminfo_flg22$index)]

#Select DMSO and flg22 only
countdata_flg22_df <- as.data.frame(countdata_flg22)
countdata_flg22_df <- countdata_flg22_df %>%
  dplyr::select(starts_with('DMSO'), starts_with('Z00_flg22'))
colnames(countdata_flg22_df)
#change it to matrix
countdata_flg22 <- as.matrix(countdata_flg22_df)
head(countdata_flg22)

Unnamed: 0,DMSO_X133955,DMSO_X133956,DMSO_X133957,Z00_flg22_X134018,Z00_flg22_X134020,Z00_flg22_X135612
AT1G01010,5276,3986,2540,2214,3526,3897
AT1G01020,2272,2077,1877,949,1052,1377
AT1G03987,31,19,28,9,3,14
AT1G01030,2000,1702,1339,516,267,300
AT1G01040,6000,4644,6160,2071,2395,3641
AT1G03993,0,0,0,0,0,0


In [6]:
### DEseq analysis -----
#define factor for DEseq
condition_flg22 <- factor(c(rep('DMSO', 3), rep('Z00_flg22', 3)))

#trim saminfo
saminfo_flg22_trim <- saminfo_flg22 %>%
  filter(grepl("^DMSO", condition) | grepl("^Z00_flg22", condition))

saminfo_flg22_trim
saminfo_flg22_trim$colnames_count <- colnames(countdata_flg22)
coldata_flg22 <- data.frame(row.names=colnames(countdata_flg22), condition_flg22)

#Chem screening
dds_flg22 <- DESeqDataSetFromMatrix(countData=countdata_flg22, colData=coldata_flg22, design=~condition_flg22)
dds_flg22 <- DESeq(dds_flg22)
colnames(dds_flg22) <- condition_flg22

res_flg22 <- results(dds_flg22, contrast=c('condition_flg22', 'Z00_flg22', 'DMSO'))

table(res_flg22$padj<0.1)

resDF_flg22<-as.data.frame(res_flg22)

output_flg22 <- merge(resDF_flg22, as.data.frame(counts(dds_flg22, normalized=TRUE)), by="row.names", sort=FALSE)

names(output_flg22)[1] <- "Gene"

output_flg22<-merge(GOdata, output_flg22, by="Gene")

write.csv(output_flg22, file="DESeq2/DEseq2_results_flg22_leaf(DESeq_DMSO_flg22_only_selected).csv")



sample,condition,index
<chr>,<chr>,<int>
X133955,DMSO_X133955,13
X133956,DMSO_X133956,14
X133957,DMSO_X133957,15
X134018,Z00_flg22_X134018,53
X134020,Z00_flg22_X134020,54
X135612,Z00_flg22_X135612,81


estimating size factors

estimating dispersions

gene-wise dispersion estimates

mean-dispersion relationship

final dispersion estimates

fitting model and testing




FALSE  TRUE 
12851  8896 

In [7]:
### Generate final table -----
Total <- output_flg22

colnames(Total)
Total <- Total %>%
  dplyr::select(Gene, geneabb, genename, log2FoldChange, padj,
                log2FoldChange, padj,
                starts_with("DMSO"), starts_with("Z00_flg22"),
                GO)
colnames(Total)

colnames(Total) <- c("Gene", "Gene_Symbol", "Gene_Name", 
                     "Log2FC_flg22", "Padj_flg22",
                     "DMSO_rep1", "DMSO_rep2", "DMSO_rep3",
                     "flg22_rep1", "flg22_rep2", "flg22_rep3",                     
                     "GO")
colnames(Total)

write.csv(Total, file="Total/Total_flg22_leaf(DESeq_DMSO_flg22_only_selected).csv")
