### Analysis of Differentially Expressed Genes between bulk and single-cell datasets
Discover cell types that might be missing in single-cell datasets.

In [1]:
library(edgeR)

Loading required package: limma



In [2]:
main_dir <- "/projects/robson-lab/research/endometriosis/"
sample_id <- "Endometriosis-revision-3"
data_dir <- "DEG/edgeR-input"
setwd(main_dir)

In [3]:
getwd()

**Load Data**
<br>load matrices containing cpm counts from RNAseq experiments and associated metadata

In [4]:
sc_matrix <- read.delim(paste("data/bulkRNAseq/",sample_id,"-sc_pseudo-bulk_CPM.txt", sep=""), sep="\t")
bs_matrix <- read.delim(paste("data/bulkRNAseq/bulk_cpm_20210719.txt", sep=""), sep="\t")
metadata <- read.delim(paste("data/bulkRNAseq/",sample_id,"-combined_metadata.txt", sep=""))

In [5]:
dim(sc_matrix)
dim(bs_matrix)

#### DEG on Eutopic tissue vs sc-pseudo-bulk data
**Create Eutopic Matrix**

In [6]:
Eutopic <- metadata[metadata$sample_type %in% c("EuE","EuC"),]
Eutopic[,"X"] <- sapply(Eutopic[,"X"], as.character)

In [7]:
Eutopic

Unnamed: 0_level_0,X,method,sample_type,patient_id
Unnamed: 0_level_1,<chr>,<fct>,<fct>,<int>
1,EC19001,single_cell,EuE,4
4,EC19004,single_cell,EuE,5
7,EC19007,single_cell,EuE,6
10,EC19010,single_cell,EuE,8
13,EC19016,single_cell,EuE,10
16,EC19019,single_cell,EuE,11
18,EC20001,single_cell,EuC,795
19,EC20002,single_cell,EuE,13
22,EC20005,single_cell,EuE,14
23,EC20009,single_cell,EuE,15


In [8]:
sc_Eut <- sc_matrix[,(colnames(sc_matrix)) %in% c("index", Eutopic$X)]
bs_Eut <- bs_matrix[,(colnames(bs_matrix)) %in% c("gene_name", Eutopic$X)]
eut_matrix <- merge(x = sc_Eut, y = bs_Eut, by.x = "index", by.y="gene_name")

dim(eut_matrix)

**Run EdgeR**

In [10]:
groups <- factor(Eutopic$method)

In [11]:
groups

In [14]:
cds <- DGEList(counts = as.matrix(eut_matrix[(2:25)]), group = groups,genes = eut_matrix$index)
nc <- cpm(cds, normalized.lib.sizes=FALSE)
cds <- estimateCommonDisp(cds)
cds <- estimateTagwiseDisp(cds)
de.tgw <- exactTest(cds, pair=c("single_cell","bulk_sequencing"))
diff.dat <- topTags(de.tgw,n=nrow(cds$counts))
res <- diff.dat$table
file <- paste("analysis/",sample_id,"/bulkRNAseq/DEG-Eutopic-SCvsBS_20220106.csv",sep="")
#file2 <- paste("analysis/",sample_id,"/bulkRNAseq/DEG-Top50-SCvsBS.csv",sep="")
write.csv(res[res$FDR < 0.001,],file=file)
#write.csv((row.names(res))[0:49],file=file2)

---

#### DEG on Ectopic Peritoneal tissue vs sc-pseudo-bulk data
**Create Ectopic Matrix**

In [15]:
Ectopic <- metadata[metadata$sample_type %in% c("EcP","EcPA"),]
Ectopic[,"X"] <- sapply(Ectopic[,"X"], as.character)

In [16]:
sc_Ect <- sc_matrix[,(colnames(sc_matrix)) %in% c("index", Ectopic$X)]
bs_Ect <- bs_matrix[,(colnames(bs_matrix)) %in% c("gene_name", Ectopic$X)]
ect_matrix <- merge(x = sc_Ect, y = bs_Ect, by.x = "index", by.y="gene_name")

dim(ect_matrix)

**Run EdgeR**

In [17]:
groups <- factor(Ectopic$method)
cds <- DGEList(counts = as.matrix(ect_matrix[(2:22)]), group = groups,genes = ect_matrix$index)
nc <- cpm(cds, normalized.lib.sizes=FALSE)
cds <- estimateCommonDisp(cds)
cds <- estimateTagwiseDisp(cds)
de.tgw <- exactTest(cds, pair=c("single_cell","bulk_sequencing"))
diff.dat <- topTags(de.tgw,n=nrow(cds$counts))
res <- diff.dat$table
file <- paste("analysis/",sample_id,"/bulkRNAseq/DEG-Ectopic-SCvsBS_20220106.csv",sep="")
#file2 <- paste("analysis/",sample_id,"/bulkRNAseq/DEG-Top50-SCvsBS.csv",sep="")
write.csv(res[res$FDR < 0.001,],file=file)
#write.csv((row.names(res))[0:49],file=file2)

---

#### DEG on Ectopic Ovary vs sc-pseudo-bulk data
**Create Ectopic Ovary Matrix**

In [18]:
Ovary <- metadata[metadata$sample_type %in% c("EcO"),]
Ovary[,"X"] <- sapply(Ovary[,"X"], as.character)

In [19]:
sc_Ect <- sc_matrix[,(colnames(sc_matrix)) %in% c("index", Ovary$X)]
bs_Ect <- bs_matrix[,(colnames(bs_matrix)) %in% c("gene_name", Ovary$X)]
ovr_matrix <- merge(x = sc_Ect, y = bs_Ect, by.x = "index", by.y="gene_name")

dim(ovr_matrix)

**Run EdgeR**

In [20]:
groups <- factor(Ovary$method)
cds <- DGEList(counts = as.matrix(ovr_matrix[(2:11)]), group = groups,genes = ovr_matrix$index)
nc <- cpm(cds, normalized.lib.sizes=FALSE)
cds <- estimateCommonDisp(cds)
cds <- estimateTagwiseDisp(cds)
de.tgw <- exactTest(cds, pair=c("single_cell","bulk_sequencing"))
diff.dat <- topTags(de.tgw,n=nrow(cds$counts))
res <- diff.dat$table
file <- paste("analysis/",sample_id,"/bulkRNAseq/DEG-Ovary-SCvsBS_20220106.csv",sep="")
#file2 <- paste("analysis/",sample_id,"/bulkRNAseq/DEG-Top50-SCvsBS.csv",sep="")
write.csv(res[res$FDR < 0.001,],file=file)
#write.csv((row.names(res))[0:49],file=file2)

---