In [None]:
library(tidyverse)
library(edgeR)
library(stringr)

In [54]:
setwd("/data/jlu/Rhbdf2_Analysis/notebooks/")
geneID_info <- read.delim("./preprocessing/eID_sym_name.csv", header=T, sep=',')
geneID_info <- geneID_info[,c(2,3,5)]
colnames(geneID_info) <- c("ensembl_gene_id","symbol","name")
# Remove duplicated gene symbols. Since some gene symbols are matched to multiple ensemble_id
geneID_info <- geneID_info[!duplicated(geneID_info$symbol),] 

In [None]:
#Load in expected counts table
Blobel_expectedCountsTable <- read.delim('./preprocessing/Blobel_counts_CompiledExpectedCounts_allGene.csv', sep = ',', 
          row.names = 1)
meta_ST3 <- data.frame(FileName=c("No1","No2","No3","ST1","ST2","ST3"),
                       Subject=c("A","B","C","A","B","C"),
                       Treatment=c(0,0,0,1,1,1))

In [33]:
# Generate countable for individual experiment 
Blobel_countsTable_WTvsiRhom2 <- round(data.frame(
    No1=Blobel_expectedCountsTable$WT1_S1_L001, 
    No2=Blobel_expectedCountsTable$WT2_S2_L001, 
    No3=Blobel_expectedCountsTable$WT3_S3_L001, 
    St1=Blobel_expectedCountsTable$KO1_S4_L001,
    St2=Blobel_expectedCountsTable$KO2_S5_L001,
    St3=Blobel_expectedCountsTable$KO3_S6_L001,
    row.names=rownames(Blobel_expectedCountsTable)
    ))


In [65]:
# Define DE functions
DiffExp <- function (targets, countsTable) {
    Treat <- factor(targets$Treatment);Subject <- factor(targets$Subject);design <- model.matrix(~Subject+Treat)
    e.litter <- DGEList(counts=countsTable)
    e.litter <- estimateGLMCommonDisp(e.litter,design)
    e.litter <- estimateGLMTrendedDisp(e.litter,design)
    e.litter <- estimateGLMTagwiseDisp(e.litter,design)
    fit <- glmFit(e.litter, design);lrt <- glmLRT(fit);diff <- topTags(lrt,n=dim(lrt)[1])$table
    result <- merge(diff,countsTable,by=0,sort=F)
    colnames(result)[1] <- "ensembl_gene_id"
    return(result)
}

DiffExp_compile <- function(targets, countsTable) {
    allGene_DE <- DiffExp(targets, countsTable)
    allGene_DE <- left_join(allGene_DE, geneID_info, by = "ensembl_gene_id")
    return(allGene_DE)
}

In [66]:
DE_WTvsiRhom2 <- DiffExp_compile(meta_ST3, Blobel_countsTable_WTvsiRhom2)
write.csv(DE_WTvsiRhom2, "../DE_out/DE_WTvsiRhom2.csv")