## Gene differential expression analysis pipline for **RNAseq** data using **Deseq2** package

#Load Library

In [None]:
install.package("deseq2")
library("deseq2")

#Data Loading and Preprocessing

In [None]:
#loading the gene expression data 
data = as.matrix(read.csv("data/Gene_counts.csv", row.names=1))
#loading the phenotype data 
pheno = read.csv("LIHC/phenotype1.csv", row.names=1)

table(pheno$Pheno)



In [None]:
#explore the data.
dim(data)

#explore the data distribution using the histogram plot
hist(data, col = "orange", main="Histogram", breaks = 100)

#scaling the data using log2
hist(log2(data+1), col = "orange", main="Histogram")




In [None]:
#save the gene names
genes=row.names(data)
#convert the data values to integers
data=apply(data,2,as.integer)
#view the data
head(data)
#rename the rows of the data
row.names(data)=genes


#Gene Differential Expression Analysis 

In [None]:
# choose our conditions
cond1="Highlyexpresed" 
cond2="NotAltered"

#creat a deseq dataset object
dds= DESeqDataSetFromMatrix( countData = data , colData = pheno, design = ~ Pheno)


#run the deseq2 worflow
dds.run = DESeq(dds)

In [None]:
#get results
res <- results(dds, contrast=c("condition",cond1, cond2))




# remove nulls
res=as.data.frame(res[complete.cases(res), ])


In [None]:
#chose the statstical significant differentaily expressed genes (DEGs)

deseq3.deg=res[res$padj < 0.01 & abs(res$log2FoldChange)>2,]

resultsNames(dds)
#export the Degs into your current folder for further analysthis

write.csv(as.matrix(deseq3.deg),file="deseq33.deg.csv", quote=F,row.names = T)
