# scRNA-Seq analysis of `Osteil` data (3 donors + 3 time points) and annotaing with `Tyser` data


 ## `** Start Part 6 **`
 

In [None]:
Sys.setenv(TZ = "Australia/Sydney")
Sys.time()

In [None]:
suppressPackageStartupMessages({
    library(scMerge)
    library(SingleCellExperiment)
    library(scater)
    library(scran)
    library(dplyr)
    library(ggpubr)
    library(forcats)
    library(tidyr)
    library(data.table)
    library(Seurat)
    library(princurve)
    library(slingshot)
    library(UpSetR)
    library(ComplexHeatmap)
    library(biomaRt)
    library(BiocParallel)
    library(edgeR)
    library(SC3)
    library(scDblFinder)
    library(here)
    library(clustifyr)
    library(ILoReg)
    library(cowplot)
    library(Matrix)
    library(janitor)
    library(Cepo)
    library(ggrepel)
    library(dittoSeq)
    library(readxl)
    library(future)
    library(fs)
    library(parallel)
})

In [None]:
baseDir <- "/home/datascience/20201211_Pierre_scRNA_hiPSc_EMB_01/"
set <- "20221104_Pierre_hiPSc"
inputDir <- paste0(baseDir, "counts/")
suppressWarnings(dir.create(paste0(baseDir, set, "_output")))
outDir <- paste0(baseDir, set, "_output/")
cat(paste0("\nbaseDir = ", baseDir))
cat(paste0("\ninputDir = ", inputDir))
cat(paste0("\noutDir = ", outDir))
ncpu <- parallel::detectCores()
cores = BiocParallel::MulticoreParam(workers = ncpu, progressbar = TRUE)
plan("multicore", workers = ncpu)
# plan()
options(future.globals.maxSize = 200 * 1000 * 1024^2)
cat(paste0("\nset name = ", set))
species = 2 #  human = 1 and mouse = 2
percent.mt_cutoff = 20
nFeature_RNA_cutoff = 200
nCount_RNA_DOWNcutoff = 1000
nCount_RNA_UPcutoff = 25000
rbio_pattern = "^RP(S|L)[0-9]" # for human: "^RP(S|L)[0-9]" for mouse: 
mito_pattern = "^MT-" # for human: "^RP(S|L)[0-9]" for mouse: 
slot = "data"
test = "wilcox"

## Project description:

In this script, we would use 10x data (2 samples) as input and filter them.

Samples are:


In [None]:
fs::dir_tree(path = inputDir, recurse = TRUE)

## ggplot themes

the parameters for ggplot is set here.

In [None]:
ggtheme_hx <- list(theme(axis.text.x=element_text(angle = 0, vjust = 0.5, 
                                                  size = 12, face="bold"),
          axis.text.y=element_text(angle = 0, vjust = 0.5, 
                                   size = 12, face="bold"),
          axis.title=element_text(size=14,face="bold"),
          legend.title = element_text(colour="black", 
                                      size=12, face="bold"),
          legend.text = element_text(colour="black", 
                                     size=12, face="bold"),
          strip.text = element_text(size = 20)))
ggtheme_vx <- list(theme(axis.text.x=element_text(angle = 90, vjust = 0.5, 
                                                  size = 12, face="bold"),
          axis.text.y=element_text(angle = 0, vjust = 0.5, 
                                   size = 12, face="bold"),
          axis.title=element_text(size=14,face="bold"),
          legend.title = element_text(colour="black", 
                                      size=12, face="bold"),
          legend.text = element_text(colour="black", 
                                     size=12, face="bold"),
          strip.text = element_text(size = 20)))

# Importing data


## Import sce object

In [None]:
sce <- readRDS(paste0(outDir, set, "_sce_merged_anno2.RDS"))
sce

In [None]:
colData(sce)[1:2,]

In [None]:
counts(altExp(sce)) %>% dim()

In [None]:
counts(sce) %>% dim()

In [None]:
rownames(counts(sce)) %>% head()

In [None]:
counts(sce) <- counts(altExp(sce))[rownames(counts(sce)), ]

In [None]:
assay(sce, "integrated") <- assay(sce, "logcounts")

In [None]:
assay(sce, "logcounts") <- logcounts(altExp(sce))[rownames(counts(sce)), ]

In [None]:
sce

In [None]:
altExp(sce) <- NULL
sce

In [None]:
# Find if a gene is in the list of genes in your sce object:
# x=toupper("Mgarp")
# x
# grep(paste0("^", x, "$"), rownames(sce), value = TRUE)

## `Gene of ineterest`

In [None]:
Markers_geneList1 <- c( "PITX2", "NODAL", "GBX2", "DPPA2", "CD24", "CHGA", "HESX1", "OTX2", "SFRP1")
Markers_geneList2 <- c("DKK1", "DKK4", "APLNR", "CER1", "LHX1", "MESP1", "FGF17", "RSPO3", 
      "TBX6", "GATA6", "T", "SNAI1", "OTX2", "CDH2", "VIM", "MIXL1", "GSC", 
      "SP5", "LEF1", "ZIC1", "NANOG", "CDH11", "DPPA2", "TCF7L1", "CD24")
Markers_geneList3 <- c("CD24", "NANOG", "ZIC1", "CDH1", "T", "SOX2", "EPCAM", "RBM47", "GBX2", 
      "DPPA2", "TWIST2", "MIXL1", "VIM", "LGR5", "SOX17", "GATA6", "OTX2")
Markers_geneList4 <- c("CD24", "NANOG", "CDH1", "SOX2", "PDGFRA", "EPCAM", "T", "KDR", "CXCR4", 
      "FOXA2", "DKK1", "FRZB", "PORCN", "LGR5", "VIM", "SOX17", "CER1", "CDH2", "OTX2")
Markers_geneList5 <- c("Pou5f1", "Lefty1", "Lefty2", "Tbxt")

In [None]:
ls(pattern = "Markers_.*")
# c(Markers_Adrenal_medulla, Markers_Bcells, Markers_Capsular_stem_cells) %>% unique()

In [None]:
markers_all <- NULL
markers <- ls(pattern = "Markers_.*")
# print(markers)
hh <- "markers_all <- c("
# hh <- lapply(markers, function(x){
#     hh <- paste0(hh,x, ",")
# })
# hh <- paste0(hh, ")")
# hh

for (i in 1:length(markers)) {
  hh <- paste0(hh, markers[i], ',')
}
hh <- paste0(hh, ')')
hh <- gsub(",)", ")", hh)
if(TRUE){
  cat(hh)
}

In [None]:
eval(parse(text = hh))
markers_all <- markers_all %>% sort() %>% unique() %>% toupper()
cat("All Marker genes:\n\n")
print(markers_all)

In [None]:
cat("Name of missing genes in our dataset:\n\n")
a <- lapply(markers_all, function(x){
#     print(x)
    s <- grep(paste0("^", x, "$"), rownames(sce), value = TRUE) #%>% sort() %>% print()
    if(identical(s, character(0)) ){
        print(paste0(x, " = Not in the list"))
        return(NULL)
        
    } 
    else {
#         print(x)
        return(x)
    }
})
genes <- unlist(a)
cat("\n\nName of available markers genes in our dataset:\n\n")
print(genes)

In [None]:
counts(sce)[genes, 1:2]

In [None]:
logcounts(sce)[genes, 1:2]

In [None]:
assay(sce, "integrated")[genes, 1:2]

## `Donors`

In [None]:
donors <- levels(as.factor(sce$donor))
donors

## `Gene expression - UMAP` for sample:

In [None]:
options(repr.plot.width=25, repr.plot.height=10)
i=1
cat(donors[[i]])
options(repr.plot.width=25, repr.plot.height=10)
suppressMessages(suppressWarnings({
plots <- lapply(genes, function(x){
    dittoDimPlot(subset(sce, ,donor == donors[[i]]), x, assay = "counts", reduction.use = "UMAP", split.by = NULL, main = paste0(x, " expression in ", donors[[i]]), size = 2) + ggtheme_hx+scale_color_gradient(name = x, low="grey", high="red")
})}))

ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")

pdf(file = paste0(outDir, set, "_Part6_coex_Ploting_genes_UMAP_", donors[[i]], ".pdf"), width = 25, height = 10)
ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")
dev.off()

## `Gene expression - TSNE` for sample:

In [None]:
# options(repr.plot.width=25, repr.plot.height=10)
# i=1
# cat(donors[[i]])
# options(repr.plot.width=25, repr.plot.height=10)
# suppressMessages(suppressWarnings({
# plots <- lapply(genes, function(x){
#     dittoDimPlot(subset(sce, ,donor == donors[[i]]), x, reduction.use = "TSNE", split.by = NULL, main = paste0(x, " expression in ", donors[[i]]), size = 2) + ggtheme_hx+scale_color_gradient(name = x, low="grey", high="red")
# })}))

# ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")

# pdf(file = paste0(outDir, set, "_Part6_coex_Ploting_genes_TSNE_", donors[[i]], ".pdf"), width = 25, height = 10)
# ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")
# dev.off()

## `Gene expression - UMAP` for sample:

In [None]:
options(repr.plot.width=25, repr.plot.height=10)
i=2
cat(donors[[i]])
options(repr.plot.width=25, repr.plot.height=10)
suppressMessages(suppressWarnings({
plots <- lapply(genes, function(x){
    dittoDimPlot(subset(sce, ,donor == donors[[i]]), x, assay = "counts", reduction.use = "UMAP", split.by = NULL, main = paste0(x, " expression in ", donors[[i]]), size = 2) + ggtheme_hx+scale_color_gradient(name = x, low="grey", high="red")
})}))

ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")

pdf(file = paste0(outDir, set, "_Part6_coex_Ploting_genes_UMAP_", donors[[i]], ".pdf"), width = 25, height = 10)
ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")
dev.off()

## `Gene expression - TSNE` for sample"

In [None]:
# options(repr.plot.width=25, repr.plot.height=10)
# i=2
# cat(donors[[i]])
# options(repr.plot.width=25, repr.plot.height=10)
# suppressMessages(suppressWarnings({
# plots <- lapply(genes, function(x){
#     dittoDimPlot(subset(sce, ,donor == donors[[i]]), x, reduction.use = "TSNE", split.by = NULL, main = paste0(x, " expression in ", donors[[i]]), size = 2) + ggtheme_hx+scale_color_gradient(name = x, low="grey", high="red")
# })}))

# ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")

# pdf(file = paste0(outDir, set, "_Part6_coex_Ploting_genes_TSNE_", donors[[i]], ".pdf"), width = 25, height = 10)
# ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")
# dev.off()

## `Gene expression - UMAP` for sample:

In [None]:
options(repr.plot.width=25, repr.plot.height=10)
i=3
cat(donors[[i]])
options(repr.plot.width=25, repr.plot.height=10)
suppressMessages(suppressWarnings({
plots <- lapply(genes, function(x){
    dittoDimPlot(subset(sce, ,donor == donors[[i]]), x, reduction.use = "UMAP", split.by = NULL, main = paste0(x, " expression in ", donors[[i]]), size = 2) + ggtheme_hx+scale_color_gradient(name = x, low="grey", high="red")
})}))

ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")

pdf(file = paste0(outDir, set, "_Part6_coex_Ploting_genes_UMAP_", donors[[i]], ".pdf"), width = 25, height = 10)
ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")
dev.off()

## `Gene expression profile` for `seurat_clusters`

In [None]:
cat(paste0("\nNumber of seurat_clusters = ", length(table(colData(sce)$seurat_clusters))))
cat("\n\nNumber of cells in each seurat_clusters:\n")
table(colData(sce)$seurat_clusters, colData(sce)$donor)

cat("\n\nNumber of cells in each scater_clusters:\n")
table(colData(sce)$label, colData(sce)$donor)

In [None]:
varName="label"

renameVarName=T
if(renameVarName){
    varLabel <- gsub("_|-", " ", varName) %>% tools::toTitleCase(.)
}else{
    varLabel <- varName
}
varLabel

In [None]:
options(repr.plot.width=15, repr.plot.height=7)
i=1
cat(donors[[i]])
varName="label"

renameVarName=TRUE
if(renameVarName){
    varLabel <- gsub("_|-", " ", varName) %>% tools::toTitleCase(.)
}else{
    varLabel <- varName
}

plots <- lapply(genes, function(x){
     plotExpression(subset(sce, ,donor == donors[[i]]), 
               features=x, 
               x=varName, colour_by=varName, xlab = varLabel, exprs_values = "logcounts") + ggtheme_vx
})
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=2, legend = "right")
pdf(file = paste0(outDir, set, "_Part6_plotExpression_", varName, "_", donors[[i]], ".pdf"), width = 15, height = 50)
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=2, legend = "right")
dev.off()
cat("The PDF file is saved as: \n", paste0(set, "_Part6_plotExpression_", varName, "_", donors[[i]], ".pdf"))

In [None]:
colData(sce)$donor_Atlas_CTG_merged <- paste(colData(sce)$donor, colData(sce)$Atlas_CTG_merged, sep = "_")
colData(sce)$Atlas_CTG_merged_donor <- paste(colData(sce)$Atlas_CTG_merged, colData(sce)$donor, sep = "_")

In [None]:
colData(sce)[1:2,]

## Gene expression for `donor_Atlas_CTG_merged`

In [None]:
colData(sce)[1:2, ]

In [None]:
table(sce$orig.ident)

In [None]:
table(sce$Atlas_CTG_merged_donor)

In [None]:
sce$Atlas_CTG_merged_donors <- sce$Atlas_CTG_merged_donor
sce$Atlas_CTG_merged_donors <- gsub("Epiblast", "1Epiblast", sce$Atlas_CTG_merged_donors)
sce$Atlas_CTG_merged_donors <- gsub("Nascent", "2Nascent", sce$Atlas_CTG_merged_donors)
sce$Atlas_CTG_merged_donors <- gsub("Endoderm", "3Endoderm", sce$Atlas_CTG_merged_donors)

In [None]:
options(repr.plot.width=15, repr.plot.height=8)
varName="orig.ident"

renameVarName=TRUE
if(renameVarName){
    varLabel <- gsub("_|-", " ", varName) %>% tools::toTitleCase(.)
}else{
    varLabel <- varName
}

plots <- lapply(genes, function(x){
     plotExpression(sce, 
               features=x, 
               x=varName, colour_by=varName, xlab = "", exprs_values = "logcounts") + ggtheme_vx
})
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=2, legend = "right")
pdf(file = paste0(outDir, set, "_Part6_plotExpression_", varName, ".pdf"), width = 15, height = 8)
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=2, legend = "right")
dev.off()
cat("The PDF file is saved as: \n", paste0(set, "_Part6_plotExpression_", varName, ".pdf"))

In [None]:
gene_pub <- c("POU5F1", "NANOG", "SOX2", "CD24",
             "MIXL1", "TBXT", "LHX1", "CER1",
             "DKK1", "DKK4", "LEFTY1", "LEFTY2",
             "FOXA2", "SOX17", "GSC", "GATA6")
gene_pub

In [None]:
options(repr.plot.width=24, repr.plot.height=8)
varName="orig.ident"

renameVarName=TRUE
if(renameVarName){
    varLabel <- gsub("_|-", " ", varName) %>% tools::toTitleCase(.)
}else{
    varLabel <- varName
}

plots <- lapply(gene_pub, function(x){
     plotExpression(sce, 
               features=x, 
               x=varName, colour_by=varName, xlab = "", exprs_values = "logcounts") + ggtheme_vx
})
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=4, legend = "right")
pdf(file = paste0(outDir, set, "_Part6_plotExpression_", varName, "_pub.pdf"), width = 24, height = 8)
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=4, legend = "right")
dev.off()
cat("The PDF file is saved as: \n", paste0(outDir, set, "_Part6_plotExpression_", varName, "_pub.pdf"))

In [None]:
options(repr.plot.width=15, repr.plot.height=8)
varName="Atlas_CTG_merged_donors"

renameVarName=TRUE
if(renameVarName){
    varLabel <- gsub("_|-", " ", varName) %>% tools::toTitleCase(.)
}else{
    varLabel <- varName
}

plots <- lapply(genes, function(x){
     plotExpression(sce, 
               features=x, 
               x=varName, colour_by=varName, xlab = "", exprs_values = "logcounts") + ggtheme_vx
})
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=2, legend = "right")
pdf(file = paste0(outDir, set, "_Part6_plotExpression_", varName, ".pdf"), width = 15, height = 8)
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=2, legend = "right")
dev.off()
cat("The PDF file is saved as: \n", paste0(set, "_Part6_plotExpression_", varName, ".pdf"))

In [None]:
options(repr.plot.width=15, repr.plot.height=8)
varName="Atlas_CTG_merged_donor"

renameVarName=TRUE
if(renameVarName){
    varLabel <- gsub("_|-", " ", varName) %>% tools::toTitleCase(.)
}else{
    varLabel <- varName
}

plots <- lapply(genes, function(x){
     plotExpression(sce, 
               features=x, 
               x=varName, colour_by=varName, xlab = "", exprs_values = "logcounts") + ggtheme_vx
})
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=2, legend = "right")
pdf(file = paste0(outDir, set, "_Part6_plotExpression_", varName, ".pdf"), width = 15, height = 8)
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=2, legend = "right")
dev.off()
cat("The PDF file is saved as: \n", paste0(set, "_Part6_plotExpression_", varName, ".pdf"))

## `Gene expression - UMAP` for sample:

In [None]:
options(repr.plot.width=25, repr.plot.height=10)
i=2
cat(donors[[i]])
options(repr.plot.width=25, repr.plot.height=10)
suppressMessages(suppressWarnings({
plots <- lapply(genes, function(x){
    dittoDimPlot(subset(sce, ,donor == donors[[i]]), x, reduction.use = "UMAP", split.by = NULL, main = paste0(x, " expression in ", donors[[i]]), size = 2) + ggtheme_hx+scale_color_gradient(name = x, low="grey", high="red")
})}))

ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")

pdf(file = paste0(outDir, set, "_Part6_coex_Ploting_genes_UMAP_", donors[[i]], ".pdf"), width = 25, height = 10)
ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")
dev.off()

In [None]:
sce

## `Gene expression profile` for `Atlas_CTG_merged` for sample 1

In [None]:
options(repr.plot.width=15, repr.plot.height=7)
i=1
cat(donors[[i]])
varName="Atlas_CTG_merged"

renameVarName=T
if(renameVarName){
    varLabel <- gsub("_|-", " ", varName) %>% tools::toTitleCase(.)
}else{
    varLabel <- varName
}

plots <- lapply(genes, function(x){
     plotExpression(subset(sce, ,donor == donors[[i]]), 
               features=x, 
               x=varName, colour_by=varName, xlab = varLabel, exprs_values = "counts") + ggtheme_vx
})
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=2, legend = "right")
pdf(file = paste0(outDir, set, "_Part6_plotExpression_", varName, "_", donors[[i]], ".pdf"), width = 15, height = 50)
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=2, legend = "right")
dev.off()
cat("The PDF file is saved as: \n", paste0(set, "_Part6_plotExpression_", varName, "_", donors[[i]], ".pdf"))

## `Gene expression profile` for `Atlas_CTG_merged_sub` for sample 1

In [None]:
options(repr.plot.width=15, repr.plot.height=7)
i=1
cat(donors[[i]])
varName="Atlas_CTG_merged_sub"

renameVarName=T
if(renameVarName){
    varLabel <- gsub("_|-", " ", varName) %>% tools::toTitleCase(.)
}else{
    varLabel <- varName
}

plots <- lapply(genes, function(x){
     plotExpression(subset(sce, ,donor == donors[[i]]), 
               features=x, 
               x=varName, colour_by=varName, xlab = varLabel, exprs_values = "counts") + ggtheme_vx
})
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=2, legend = "right")
pdf(file = paste0(outDir, set, "_Part6_plotExpression_", varName, "_", donors[[i]], ".pdf"), width = 15, height = 50)
ggpubr::ggarrange(plotlist = plots, common.legend = T, ncol=2, legend = "right")
dev.off()
cat("The PDF file is saved as: \n", paste0(set, "_Part6_plotExpression_", varName, "_", donors[[i]], ".pdf"))

In [None]:
table(colData(sce)$Atlas_CTG_merged)

In [None]:
table(colData(sce)$Atlas_CTG_merged_sub)

## Show the `UMAP` and `TSNE` for the new clusters

In [None]:
types <- c("Atlas_CTG_merged", "Atlas_CTG_merged_sub", "donor", "label", "day", "orig.ident")
cat("Let's show the Figures for the following columns:\n ")
print(types)

In [None]:
options(repr.plot.width=10, repr.plot.height=7)
plots <- lapply(types, function(x){
    dittoDimPlot(sce, x, 
             reduction.use = "PCA", 
             legend.title = x, main = x, opacity = 0.5, order = "decreasing",) + ggtheme_hx
    
})
plots
pdf(file = paste0(outDir, set, "_Part6_PCA_types_merged_or_op5.pdf"), width = 10, height = 7)
plots
dev.off()

In [None]:
options(repr.plot.width=10, repr.plot.height=7)
plots <- lapply(types, function(x){
    dittoDimPlot(sce, x, 
             reduction.use = "PCA", 
             legend.title = x, main = x, order = "decreasing") + ggtheme_hx
    
})
plots
pdf(file = paste0(outDir, set, "_Part6_PCA_types_merged_or.pdf"), width = 10, height = 7)
plots
dev.off()

In [None]:
options(repr.plot.width=10, repr.plot.height=7)
plots <- lapply(types, function(x){
    dittoDimPlot(sce, x, 
             reduction.use = "UMAP", 
             legend.title = x, main = x, opacity = 0.5, order = "decreasing") + ggtheme_hx
    
})
plots
pdf(file = paste0(outDir, set, "_Part6_UMAP_types_merged_or_op5.pdf"), width = 10, height = 7)
plots
dev.off()

In [None]:
options(repr.plot.width=10, repr.plot.height=7)
plots <- lapply(types, function(x){
    dittoDimPlot(sce, x, 
             reduction.use = "UMAP", 
             legend.title = x, main = x, order = "decreasing") + ggtheme_hx
    
})
plots
pdf(file = paste0(outDir, set, "_Part6_UMAP_types_merged_or.pdf"), width = 10, height = 7)
plots
dev.off()

In [None]:
options(repr.plot.width=10, repr.plot.height=7)
plots <- lapply(types, function(x){
    dittoDimPlot(sce, x, 
             reduction.use = "UMAP", 
             legend.title = x, main = x, do.label = TRUE, labels.size = 5, opacity = 0.5, order = "decreasing") + ggtheme_hx
    
})
plots
pdf(file = paste0(outDir, set, "_Part6_UMAP_types_merged_label_op5.pdf"), width = 10, height = 7)
plots
dev.off()

In [None]:
options(repr.plot.width=10, repr.plot.height=7)
plots <- lapply(types, function(x){
    dittoDimPlot(sce, x, 
             reduction.use = "TSNE", 
             legend.title = x, main = x, opacity = 0.5, order = "decreasing") + ggtheme_hx
    
})
plots
pdf(file = paste0(outDir, set, "_Part6_TSNE_types_merged_or_op5.pdf"), width = 10, height = 7)
plots
dev.off()

In [None]:
options(repr.plot.width=10, repr.plot.height=7)
plots <- lapply(types, function(x){
    dittoDimPlot(sce, x, 
             reduction.use = "TSNE", 
             legend.title = x, main = x, order = "decreasing") + ggtheme_hx
    
})
plots
pdf(file = paste0(outDir, set, "_Part6_TSNE_types_merged_or.pdf"), width = 10, height = 7)
plots
dev.off()

In [None]:
options(repr.plot.width=10, repr.plot.height=7)
plots <- lapply(types, function(x){
    dittoDimPlot(sce, x, 
             reduction.use = "TSNE", 
             legend.title = x, main = x, do.label = TRUE, labels.size = 5, opacity = 0.5, order = "decreasing") + ggtheme_hx
    
})
plots
pdf(file = paste0(outDir, set, "_Part6_TSNE_types_merged_label_or_op5.pdf"), width = 10, height = 7)
plots
dev.off()

In [None]:
options(repr.plot.width=10, repr.plot.height=7)
plots <- lapply(types, function(x){
    dittoDimPlot(sce, x, 
             reduction.use = "TSNE", 
             legend.title = x, main = x, do.label = TRUE, labels.size = 5, order = "decreasing") + ggtheme_hx
    
})
plots
pdf(file = paste0(outDir, set, "_Part6_TSNE_types_merged_label_or.pdf"), width = 10, height = 7)
plots
dev.off()

In [None]:
options(repr.plot.width=19, repr.plot.height=9)
plots <- lapply(types, function(x){
    dittoDimPlot(sce, x, 
             reduction.use = "UMAP", split.by = "donor", 
             legend.title = x, main = x) + ggtheme_hx
    
})
plots
pdf(file = paste0(outDir, set, "_Part6_UMAP_types_merged_split.pdf"), width = 18, height = 9)
plots
dev.off()

In [None]:
options(repr.plot.width=19, repr.plot.height=9)
plots <- lapply(types, function(x){
    dittoDimPlot(sce, x, 
             reduction.use = "TSNE", split.by = "donor", 
             legend.title = x, main = x) + ggtheme_hx
    
})
plots
pdf(file = paste0(outDir, set, "_Part6_TSNE_types_merged_split.pdf"), width = 18, height = 9)
plots
dev.off()

In [None]:
options(repr.plot.width=19, repr.plot.height=9)
plots <- lapply(types, function(x){
    dittoDimPlot(sce, x, 
             reduction.use = "TSNE", split.by = "donor", 
             legend.title = x, main = x, do.label = TRUE, labels.size = 5) + ggtheme_hx
    
})
plots
pdf(file = paste0(outDir, set, "_Part6_TSNE_types_merged_split_label.pdf"), width = 18, height = 9)
plots
dev.off()

In [None]:
options(repr.plot.width=19, repr.plot.height=9)
plots <- lapply(types, function(x){
    dittoDimPlot(sce, x, 
             reduction.use = "UMAP", split.by = "donor", 
             legend.title = x, main = x, do.label = TRUE, labels.size = 5) + ggtheme_hx
    
})
plots
pdf(file = paste0(outDir, set, "_Part6_UMAP_types_merged_split_label.pdf"), width = 18, height = 9)
plots
dev.off()

## Determine cell proportion of annotated cells

In [None]:
celltype_table <- table(colData(sce)$Atlas_CTG_merged, colData(sce)$orig.ident) %>%
        as.data.frame.matrix() %>% 
        tibble::rownames_to_column(., "type") %>%
        adorn_totals(c("col", "row")) %>%
        tibble::column_to_rownames(., "type")
celltype_table

In [None]:
celltype_table <- table(colData(sce)$Atlas_CTG_merged_sub, colData(sce)$orig.ident) %>%
        as.data.frame.matrix() %>% 
        tibble::rownames_to_column(., "type") %>%
        adorn_totals(c("col", "row")) %>%
        tibble::column_to_rownames(., "type")
celltype_table

In [None]:
options(repr.plot.width=20, repr.plot.height=7)
suppressMessages(suppressWarnings({
   celltype_stats <- table(colData(sce)$Atlas_CTG_merged, colData(sce)$orig.ident) %>%
        as.data.frame() 
    colnames(celltype_stats) <- c("CellType", "Sample", "CellNumber")
    a <- ggplot(celltype_stats, aes(x = CellNumber, y = Sample)) +
              geom_col(aes(fill = CellType), width = 0.7) +
              theme_bw() + ggtheme_hx +
              xlab("Cell number") + ylab("")

    celltype_prop <-  table(colData(sce)$Atlas_CTG_merged, colData(sce)$orig.ident) %>%
            as.data.frame.matrix() 
    celltype_prop <- as.matrix(prop.table(as.matrix(celltype_prop), 2))
    meltdf.p <- as.data.frame(melt(celltype_prop,id="Cell Types"))
    colnames(meltdf.p) <- c("CellType", "Sample", "CellNumber")
    b <- ggplot(meltdf.p, aes(x = CellNumber, y = Sample)) +
              geom_col(aes(fill = CellType), width = 0.7) +
              theme_bw() + ggtheme_hx +
              xlab("Cell Proportion") + ylab("")

    ggpubr::ggarrange(a, b, common.legend = T, ncol=1, legend = "right") 
}))
pdf(file = paste0(outDir, set, "_Part6_cellProportion_Atlas_CTG_merged_orig.ident.pdf"), width = 25, height = 10)
ggpubr::ggarrange(a, b, common.legend = T, ncol=1, legend = "right")
dev.off()

In [None]:
options(repr.plot.width=20, repr.plot.height=7)
suppressMessages(suppressWarnings({
   celltype_stats <- table(colData(sce)$Atlas_CTG_merged_sub, colData(sce)$orig.ident) %>%
        as.data.frame() 
    colnames(celltype_stats) <- c("CellType", "Sample", "CellNumber")
    a <- ggplot(celltype_stats, aes(x = CellNumber, y = Sample)) +
              geom_col(aes(fill = CellType), width = 0.7) +
              theme_bw() + ggtheme_hx +
              xlab("Cell number") + ylab("")

    celltype_prop <-  table(colData(sce)$Atlas_CTG_merged_sub, colData(sce)$orig.ident) %>%
            as.data.frame.matrix() 
    celltype_prop <- as.matrix(prop.table(as.matrix(celltype_prop), 2))
    meltdf.p <- as.data.frame(melt(celltype_prop,id="Cell Types"))
    colnames(meltdf.p) <- c("CellType", "Sample", "CellNumber")
    b <- ggplot(meltdf.p, aes(x = CellNumber, y = Sample)) +
              geom_col(aes(fill = CellType), width = 0.7) +
              theme_bw() + ggtheme_hx +
              xlab("Cell Proportion") + ylab("")

    ggpubr::ggarrange(a, b, common.legend = T, ncol=1, legend = "right") 
}))
pdf(file = paste0(outDir, set, "_Part6_cellProportion_Atlas_CTG_merged_sub_orig.ident.pdf"), width = 25, height = 10)
ggpubr::ggarrange(a, b, common.legend = T, ncol=1, legend = "right")
dev.off()

In [None]:
drop <- grep("subset|sum|detected|total|Annotation_", colnames(colData(sce)), value = TRUE)
drop

In [None]:
hh <- ""
for (i in 1:length(drop)) {
  hh <- paste0(hh, 'colData(sce[[1]])[["', drop[i], '"]] <- NULL\n')
  hh <- paste0(hh, 'colData(sce[[2]])[["', drop[i], '"]] <- NULL\n')
}
if(TRUE){
  cat(hh)
}

In [None]:
if(FALSE){
  eval(parse(text = hh))
}

In [None]:
colData(sce)[1:2,]

## `Co-expressed` genes

In [None]:
# levels(as.factor(colData(sce)$type))
print(genes)

In [None]:
if(is.list(sce)){
    hh <- ""
    hh <- paste0(hh,'sce <- lapply(sce, function(x){\n')
    hh <- paste0(hh,'    addPerCellQC(x,\n')
    hh <- paste0(hh,'    subsets=list(\n')
    for (i in 1:length(genes[-1])) {
      hh <- paste0(hh, '        ', gsub("-", "_", genes[i]), '=grep("^', genes[i], '$", rownames(x)),\n')
    }
    hh <- paste0(hh, '        ', dplyr::last(genes), '=grep("^', dplyr::last(genes), '$", rownames(x))))})\nsce')
    if(TRUE){
      cat(hh)
    }
} else {
    hh <- ""
    hh <- paste0(hh,'sce <- addPerCellQC(sce,\n')
    hh <- paste0(hh,'    subsets=list(\n')
    for (i in 1:length(genes[-1])) {
      hh <- paste0(hh, '        ', gsub("-", "_", genes[i]), '=grep("^', genes[i], '$", rownames(sce)),\n')
    }
    hh <- paste0(hh, '        ', dplyr::last(genes), '=grep("^', dplyr::last(genes), '$", rownames(sce))))\nsce')
    if(TRUE){
      cat(hh)
    }
}

In [None]:
eval(parse(text = hh))

## `Display the UMAP for all genes` (TRUE/FALSE)

In [None]:
print(genes)

In [None]:
subsets <- grep("^subsets_.*_detected", colnames(colData(sce)), value = TRUE)
print(subsets)

In [None]:
# lapply(genes[c(1:2)], function(x){
#     colData(sce)[[paste0("subsets_", x, "_detected")]] <- ifelse(colData(sce)[[paste0("subsets_", x, "_detected")]]==1,TRUE, FALSE)
#     table(colData(sce)[[paste0("subsets_", x, "_detected")]])
# })

In [None]:
options(repr.plot.width=23, repr.plot.height=7)
suppressMessages(suppressWarnings({
    plots <- lapply(genes, function(x){
        colData(sce)[[paste0("subsets_", x, "_detected")]] <- ifelse(colData(sce)[[paste0("subsets_", x, "_detected")]]==1,TRUE, FALSE)
        dittoDimPlot(sce, paste0("subsets_", x, "_detected"), reduction.use = "UMAP", split.by = NULL, main = x, size = 2, opacity = 0.5) + ggtheme_hx+scale_color_manual(name = x, values=c("grey", "red"))
})}))
ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")

pdf(file = paste0(outDir, set, "_Part6_coex_Ploting_genes_detected.pdf"), width = 23, height = 7)
ggpubr::ggarrange(plotlist = plots, common.legend = F, ncol=3, legend = "right")
dev.off()

## `Save Annotated sce`

In [None]:
sce

In [None]:
# re-writess the previously saved sce object.
saveRDS(sce, file = paste0(outDir, set, "_sce_merged_anno2.RDS"))
cat("File saved at: \n", paste0(outDir, set, "_sce_merged_anno2.RDS"))
cat("\nTime: \n")
print(Sys.time())

In [None]:
sce <- readRDS(paste0(outDir, set, "_sce_merged_anno2.RDS"))

## sessionInfo()

In [None]:
sessionInfo()

 ## `** End Part 6 **`