In [1]:
library(tidyverse)
library(edgeR)
library(stringr)

── [1mAttaching packages[22m ──────────────────────────────────────────────────────────────────────────────────── tidyverse 1.3.2 ──
[32m✔[39m [34mggplot2[39m 3.4.1     [32m✔[39m [34mpurrr  [39m 1.0.1
[32m✔[39m [34mtibble [39m 3.1.8     [32m✔[39m [34mdplyr  [39m 1.1.0
[32m✔[39m [34mtidyr  [39m 1.3.0     [32m✔[39m [34mstringr[39m 1.5.0
[32m✔[39m [34mreadr  [39m 2.1.4     [32m✔[39m [34mforcats[39m 1.0.0
── [1mConflicts[22m ─────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
Loading required package: limma



In [3]:
setwd("/data/jlu/Rhbdf2_Analysis/notebooks/")
geneID_info <- read.delim("./preprocessing/eID_sym_name.csv", header=T, sep=',')
geneID_info <- geneID_info[,c(2,3,5)]
colnames(geneID_info) <- c("ensembl_gene_id","symbol","name")
# Remove duplicated gene symbols. Since some gene symbols are matched to multiple ensemble_id
geneID_info <- geneID_info[!duplicated(geneID_info$symbol),] 

In [7]:
#Load in expected counts table
Blobel_expectedCountsTable <- read.delim('./preprocessing/Blobel_counts_CompiledExpectedCounts_allGene.csv', sep = ',', 
          row.names = 1)
meta_ST3 <- data.frame(FileName=c("No1","No2","No3","ST1","ST2","ST3"),
                       Subject=c("A","B","C","A","B","C"),
                       Treatment=c(0,0,0,1,1,1))

In [33]:
# Generate countable for individual experiment 
Blobel_countsTable_WTvsiRhom2 <- round(data.frame(
    No1=Blobel_expectedCountsTable$WT1_S1_L001, 
    No2=Blobel_expectedCountsTable$WT2_S2_L001, 
    No3=Blobel_expectedCountsTable$WT3_S3_L001, 
    St1=Blobel_expectedCountsTable$KO1_S4_L001,
    St2=Blobel_expectedCountsTable$KO2_S5_L001,
    St3=Blobel_expectedCountsTable$KO3_S6_L001,
    row.names=rownames(Blobel_expectedCountsTable)
    ))


In [5]:
# Define DE functions
DiffExp <- function (targets, countsTable) {
    Treat <- factor(targets$Treatment);Subject <- factor(targets$Subject);design <- model.matrix(~Subject+Treat)
    e.litter <- DGEList(counts=countsTable)
    e.litter <- estimateGLMCommonDisp(e.litter,design)
    e.litter <- estimateGLMTrendedDisp(e.litter,design)
    e.litter <- estimateGLMTagwiseDisp(e.litter,design)
    fit <- glmFit(e.litter, design);lrt <- glmLRT(fit);diff <- topTags(lrt,n=dim(lrt)[1])$table
    result <- merge(diff,countsTable,by=0,sort=F)
    colnames(result)[1] <- "ensembl_gene_id"
    return(result)
}

DiffExp_compile <- function(targets, countsTable) {
    allGene_DE <- DiffExp(targets, countsTable)
    allGene_DE <- left_join(allGene_DE, geneID_info, by = "ensembl_gene_id")
    return(allGene_DE)
}



In [None]:
DE_WTvsiRhom2 <- DiffExp_compile(meta_ST3, Blobel_countsTable_WTvsiRhom2)
write.csv(DE_WTvsiRhom2, "../DE_out/DE_allgene_WTvsiRhom2.csv")

In [42]:
# DE by Olfr only by filtering for "Olfr" and adjusting pValue
subset_olfr <- function(df){
    df <- df[grep("^Olfr", df$symbol),]
#     df <- df[-grep("-ps", df$symbol),]
    df$FDR <- p.adjust(df$PValue, method='fdr')
    return(df)
}

de_df <- read.delim('../DE_out/DE_allgene_WTvsiRhom2.csv', 
           sep=',', header=T, row.names=1)
olfrOnly_df <- subset_olfr(de_df)
write.csv(olfrOnly_df, "../DE_out/DE_Olfr_WTvsiRhom2.csv")

In [40]:
sessionInfo()

R version 4.2.2 Patched (2022-11-10 r83330)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.5 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] edgeR_3.40.2    limma_3.54.1    forcats_1.0.0   stringr_1.5.0  
 [5] dplyr_1.1.0     purrr_1.0.1     readr_2.1.4     tidyr_1.3.0    
 [9] tibble_3.1.8    ggplot2_3.4.1   tidyverse_1.3.2

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.10  