In [None]:
# load libraries
library(Seurat)
library(dplyr)
library(tidyverse)
library(doParallel)
library(patchwork)
source("/share/home/qlab/projects/qlab_yrs/project_ssc/scTools.R")
# 设定下跑数据的内存
library(future)
plan("multiprocess", workers = 10)
plan()
options(future.globals.maxSize = 20*1000 * 1024^2) 

In [None]:
# Obtain raw data 
SampleInfo <- read_csv("./data/SamFeatureAll.csv")
SampleList <- SampleInfo$SampleID
SampleName <- SampleInfo$SamNum
sample.combined <- NULL
length(SampleList)

for(i in seq_along(SampleList)){
  if(i <= 13){
    print(i)
    RawData <- Read10X(data.dir = paste0("./matrix/SSC-Paper/",SampleList[i]))
    newnames  <-  paste0(SampleName[i],"_",colnames(RawData))
    colnames(RawData)  <- newnames
    empty.out <- emptyDrops(RawData, lower = 200, ignore = 300)
    is.cell <- empty.out$FDR <= 0.01
    sum(is.cell, na.rm=TRUE)
    table(Limited=empty.out$Limited, Significant=is.cell)
    cells.use <- empty.out %>% as.data.frame() %>%
      rownames_to_column(var = "CellName") %>%
      dplyr::filter(FDR < 0.01, !is.na(FDR)) %>%
      pull(CellName)
    Sample<-CreateSeuratObject(counts = RawData[, cells.use], project = SampleList[i])
    if(is.null(sample.combined)) {
      sample.combined <- Sample
    } else {sample.combined <- merge(sample.combined, Sample, add.cell.ids = NULL,merge.data = TRUE, na.rm = TRUE)}
  }else if(i == 14|i == 15){
      print(i)
      RawData <- Read10X_h5(paste0("./matrix/GEX_", SampleList[i], "/raw_feature_bc_matrix.h5"))
      newnames  <-  paste0(SampleName[i],"_",colnames(RawData))
      colnames(RawData)  <- newnames
      empty.out <- emptyDrops(RawData, lower = 200, ignore = 300)
      is.cell <- empty.out$FDR <= 0.01
      sum(is.cell, na.rm=TRUE)
      table(Limited=empty.out$Limited, Significant=is.cell)
      cells.use  <- empty.out %>% 
                    as.data.frame() %>%
                    rownames_to_column(var = "CellName") %>%
                    dplyr::filter(FDR < 0.01, !is.na(FDR)) %>%
                    pull(CellName)
      Sample<-CreateSeuratObject(counts = RawData[, cells.use], project = SampleList[i])
      sample.combined <- merge(sample.combined, Sample, add.cell.ids = NULL,merge.data = TRUE, na.rm = TRUE)
  }else if(i > 15){
      print(i)
      RawData <- Read10X_h5(paste0("./matrix/2018v2/", SampleList[i], "_raw_matrices.h5"))
      newnames <- paste0(SampleName[i],"_",colnames(RawData))
      colnames(RawData) <- newnames
      empty.out <- emptyDrops(RawData, lower = 200, ignore = 300)
      is.cell <- empty.out$FDR <= 0.01
      sum(is.cell, na.rm=TRUE)
      table(Limited = empty.out$Limited, Significant = is.cell)
      cells.use <- empty.out %>% as.data.frame() %>%
      rownames_to_column(var = "CellName") %>%
      dplyr::filter(FDR < 0.01, !is.na(FDR)) %>%
      pull(CellName)
      Sample<-CreateSeuratObject(counts = RawData[, cells.use], project = SampleList[i])
      sample.combined <- merge(sample.combined, Sample, add.cell.ids = NULL,merge.data = TRUE, na.rm = TRUE)
  }else{print('done')}
}
saveRDS(sample.combined, file="./data/SSC_Combine/raw.rds")

In [None]:
sample.combined <- readRDS("./data/SSC_Combine/object/all/raw.rds")
# Add Sample information
metadata  <- sample.combined@meta.data[,c(1,2,3)] %>%
  rownames_to_column(var = "Cell") %>%
  dplyr::mutate(SamNum = orig.ident) %>%
  left_join(SampleInfo, by = "SamNum") %>%
  tibble::column_to_rownames(var = "Cell")
sample.combined <- AddMetaData(object = sample.combined, metadata = metadata)
sample.combined@meta.data["percent.mt"] <- PercentageFeatureSet(sample.combined, pattern = "^MT-")

VlnPlot(sample.combined, features = c("nCount_RNA", "nFeature_RNA", "percent.mt"),pt.size=0, ncol=3) 
FeatureScatter(sample.combined, feature1 = "nFeature_RNA", feature2 = "percent.mt") + geom_smooth() #+xlim(0,5000)+ylim(0,2500)

# Remove low quality cells
sample.filter <- subset(sample.combined, subset= nCount_RNA > 400 & nFeature_RNA > 200 & percent.mt < 15)
sample.filter <- NormalizeData(sample.filter, normalization.method = "LogNormalize", scale.factor = 10000)

In [4]:
# CCA
sample.list <- SplitObject(sample.filter, split.by = "orig.ident")
sample.ssc <- lapply(X = sample.list, FUN = function(x) {
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 4000)#nfeatures可以改为4000去找一些不常见的细胞群
})
features <- SelectIntegrationFeatures(object.list = sample.ssc)
PbmcList <- lapply(X = sample.ssc, FUN = function(x) {
    x <- ScaleData(x, features = features, verbose = FALSE)
    x <- RunPCA(x, features = features, verbose = FALSE)
})
immune.anchors <- FindIntegrationAnchors(object.list = PbmcList, anchor.features = features, reduction = "cca")
immune.combined <- IntegrateData(anchorset = immune.anchors)
DefaultAssay(immune.combined) <- "integrated"
saveRDS(immune.combined, file="./data/SSC_Combine/cca_ssc.rds")

[1] 1


“Feature names cannot have underscores ('_'), replacing with dashes ('-')”


[1] 2


“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 3


“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 4


“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 5


“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 6


“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 7


“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 8


“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 9


“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 10


“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 11


“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 12


“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 13


“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 14


“'giveCsparse' has been deprecated; setting 'repr = "T"' for you”
“The following arguments are not used: na.rm”


[1] 15


“'giveCsparse' has been deprecated; setting 'repr = "T"' for you”
“The following arguments are not used: na.rm”


[1] 16


“'giveCsparse' has been deprecated; setting 'repr = "T"' for you”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 17


“'giveCsparse' has been deprecated; setting 'repr = "T"' for you”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 18


“'giveCsparse' has been deprecated; setting 'repr = "T"' for you”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 19


“'giveCsparse' has been deprecated; setting 'repr = "T"' for you”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 20


“'giveCsparse' has been deprecated; setting 'repr = "T"' for you”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 21


“'giveCsparse' has been deprecated; setting 'repr = "T"' for you”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 22


“'giveCsparse' has been deprecated; setting 'repr = "T"' for you”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 23


“'giveCsparse' has been deprecated; setting 'repr = "T"' for you”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 24


“'giveCsparse' has been deprecated; setting 'repr = "T"' for you”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


[1] 25


“'giveCsparse' has been deprecated; setting 'repr = "T"' for you”
“Feature names cannot have underscores ('_'), replacing with dashes ('-')”
“The following arguments are not used: na.rm”


In [5]:
# Calculate Cell State Scores
# --- proliferation
DefaultAssay(immune.combined) <- "RNA"
immune.combined <- CellCycleScoring(immune.combined, s.features = cc.genes$s.genes,
                                    g2m.features = cc.genes$g2m.genes, set.ident = TRUE)
DefaultAssay(immune.combined) <- "integrated"
var.features  <- VariableFeatures(immune.combined)
genes.cc  <- extract_cellcycle(immune.combined, var.features, cores =10, assay = "integrated", cutoff = 0.1)

# --- Interferon
library("GSEABase")
msigdb <- getBroadSets("./data/msigdb_v7.4.xml")
index <- sapply(msigdb, function(gs)
                bcCategory(collectionType(gs))=="c2")
geneset.c2 = msigdb[index]
geneset.interferon <- geneset.c2[["BROWNE_INTERFERON_RESPONSIVE_GENES"]]
interferon.genes <- geneset.interferon@geneIds
interferon.genes  <-  interferon.genes[interferon.genes %in% rownames(immune.combined)]
immune.combined <- AddModuleScore(
                 object = immune.combined,
                 features = list(interferon.genes),
                 ctrl = length(interferon.genes),
                 name = 'IFN.Score',
                 assay = "RNA")
genes.ifn  <- extract_ifn(immune.combined, var.features, cores =10, assay = "integrated", cutoff = 0.1)

# mitochondrion
genes.mt <- grep(pattern="^MT-",x=var.features ,value = T)

var.features.filtered  <- var.features[! var.features %in% c(cc.genes$g2m.genes, cc.genes$s.genes, genes.cc[1:100],
                                                             interferon.genes, genes.ifn[1:50],genes.mt)]


In [None]:
# Run the standard workflow for visualization and clustering
DefaultAssay(immune.combined) <- "integrated"
immune.cluster <- ScaleData(immune.combined, verbose = FALSE)
immune.cluster <- RunPCA(immune.cluster, npcs = 50, verbose = FALSE, features = var.features.filtered)
immune.cluster <- RunUMAP(immune.cluster, reduction = "pca", dims = 1:30)
immune.cluster <- FindNeighbors(immune.cluster, reduction = "pca", dims = 1:30)
immune.cluster <-  FindClusters(immune.cluster, resolution = c(0.1,0.2, 0.4, 0.8,1.2,1.6,2,3))
saveRDS(immune.cluster, file="./data/SSC_Combine/res_SSC.rds")

In [9]:
# --- Check the low quality cells
DefaultAssay(immune.cluster) <- "RNA"
#pdf(file="./result/QC/SSC_PBMC/nF_RNA.pdf")
FeaturePlot(immune.cluster, features=c("nFeature_RNA"))
#dev.off()
#pdf(file="./result/QC/SSC_PBMC/perMT_RNA.pdf")
FeaturePlot(immune.cluster, features=c("percent.mt"))
#dev.off()
FeaturePlot(immune.cluster, features=c("IFN.Score1"))

In [None]:
immune.cluster@meta.data %>%
  group_by(integrated_snn_res.1.6)  %>%
  summarise(gene = mean(nFeature_RNA), umi = mean(nCount_RNA), mt = mean(percent.mt))

In [11]:
# Visualized different res UMAPs
setwd("/share/home/qlab/projects/qlab_yrs/project_ssc/result/Umap/comDifRes")
pdf("res0.1.pdf",width=15)
Idents(immune.cluster) <- "integrated_snn_res.0.1"
DimPlot(immune.cluster, reduction = "umap",label=TRUE)+NoLegend()
dev.off()

pdf("res0.2.pdf",width=15)
Idents(immune.cluster) <- "integrated_snn_res.0.2"
DimPlot(immune.cluster, reduction = "umap",label=TRUE)+NoLegend()
dev.off()

pdf("res0.4.pdf",width=15)
Idents(immune.cluster) <- "integrated_snn_res.0.4"
DimPlot(immune.cluster, reduction = "umap",label=TRUE)+NoLegend()
dev.off()

pdf("res0.8.pdf",width=15)
Idents(immune.cluster) <- "integrated_snn_res.0.8"
DimPlot(immune.cluster, reduction = "umap",label=TRUE)+NoLegend()
dev.off()

pdf("res1.2.pdf",width=15)
Idents(immune.cluster) <- "integrated_snn_res.1.2"
DimPlot(immune.cluster, reduction = "umap",label=TRUE)+NoLegend()
dev.off()

pdf("res1.6.pdf",width=15)
Idents(immune.cluster) <- "integrated_snn_res.1.6"
DimPlot(immune.cluster, reduction = "umap",label=TRUE)+NoLegend()
dev.off()

pdf("res2.pdf",width=15)
Idents(immune.cluster) <- "integrated_snn_res.2"
DimPlot(immune.cluster, reduction = "umap",label=TRUE)+NoLegend()
dev.off()

pdf("res3.pdf",width=15)
Idents(immune.cluster) <- "integrated_snn_res.3"
DimPlot(immune.cluster, reduction = "umap",label=TRUE)+NoLegend()
dev.off()

In [None]:
# Calculate cluster top genes with different res
#immune.cluster <- readRDS(file="data/SSC_Combine/res_SSC.rds")
DefaultAssay(immune.cluster) <- "RNA"
# setwd("/share/home/qlab/projects/qlab_yrs/project_ssc/data/MrkGen/SSC_Combine")
#0.1, 0.2, 0.4, 0.8,1.2,1.6,2, 3
Idents(immune.cluster) <- "integrated_snn_res.0.1"
immune.combined.markers0.1 <- FindAllMarkers(immune.cluster, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.csv(immune.combined.markers0.1, file="ssc_marker0.1ALL.csv")

Idents(immune.cluster) <- "integrated_snn_res.0.2"
immune.combined.markers0.2 <- FindAllMarkers(immune.cluster, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.csv(immune.combined.markers0.2, file="ssc_marker0.2ALL.csv")

Idents(immune.cluster) <- "integrated_snn_res.0.4"
immune.combined.markers0.4 <- FindAllMarkers(immune.cluster, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.csv(immune.combined.markers0.4, file="ssc_marker0.4ALL.csv")


Idents(immune.cluster) <- "integrated_snn_res.0.8"
immune.combined.markers0.8 <- FindAllMarkers(immune.cluster, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.csv(immune.combined.markers0.8, file="ssc_marker0.8ALL.csv")


Idents(immune.cluster) <- "integrated_snn_res.1.2"
immune.combined.markers1.2 <- FindAllMarkers(immune.cluster, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.csv(immune.combined.markers1.2, file="ssc_marker1.2ALL.csv")


Idents(immune.cluster) <- "integrated_snn_res.1.6"
immune.combined.markers1.6 <- FindAllMarkers(immune.cluster, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.csv(immune.combined.markers1.6, file="ssc_marker1.6ALL.csv")


Idents(immune.cluster) <- "integrated_snn_res.2"
immune.combined.markers2 <- FindAllMarkers(immune.cluster, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.csv(immune.combined.markers2, file="ssc_marker2ALL.csv")


Idents(immune.cluster) <- "integrated_snn_res.3"
immune.combined.markers3 <- FindAllMarkers(immune.cluster, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
write.csv(immune.combined.markers3, file="ssc_marker3ALL.csv")

In [13]:
# Annotation
setwd("/share/home/qlab/projects/qlab_yrs/project_ssc/result/Annotation/SSC_Combined")
# Tcell
pdf("Tcell.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("CD3D","CD3E","CD8A","CD4"))
dev.off()

pdf("Tclassify.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("CCR7","GZMK","GZMB","FOXP3"))
dev.off()

pdf("gdT.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("TRDC","TRGV9","TRGV10"))
dev.off()

pdf("Bcell.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("JCHAIN","CD79A","MS4A1","IGHG1"))
dev.off()

pdf("Macrophage.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("CD68","CD163","CD14","FCGR3A"))
dev.off()

pdf("NK123_n4.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("FCG3RA","CX3CR1","FGFBP2","FCGR3B"))
dev.off()

pdf("cDC12_pDC34.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("CCL17","CD1C","CLEC9A","LILRA4"))
dev.off()

pdf("Unassign.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("PF4","PPBP","TPSB2"))
dev.off()

pdf("Macrophage_Lung_monocyte.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("SPP1","G0S2","CHI3L1"))
dev.off()

# CEC:ciliated epithelial cell; CC:club cell; AT: alveolar type
pdf("CEC_CC_AT1_AT2.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("FOXJ1","SCGB1A1","AGER","SFTPC"))
dev.off()

pdf("Mast_cell.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("TPSB2"))
dev.off()


pdf("LEC.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("CLDN5","PDPN","PROX1"))
dev.off()

# LBC:lung_basal_cell
pdf("myofibroblast_pericyte_LBC.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("MYH11","RGS5","KRT5"))
dev.off()

pdf("fibroblast.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("DCN","FGF7","MME"))
dev.off()

pdf("ciliated_epithelial_cell.pdf",width=15)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("CAPS"))
dev.off()

# fibroblast
fig.size(15,20)
DefaultAssay(immune.cluster) <- "RNA"
FeaturePlot(immune.cluster, features=c("SPINT2","CD14","LMCD1","FGFR4","FIGF")) #SPINT2hi
FeaturePlot(immune.cluster, features=c("MFAP5","CD34","THY1","SLPI","PLA2G2A")) #MFAP5hi
FeaturePlot(immune.cluster, features=c("WIF1","ITGA10")) #WIF1hi

# Monocyte --> Macrophage 
FeaturePlot(immune.cluster, features=c("FCN1","CCR2","CD68","CCL3")) #Monocyte 为 FCN1
FeaturePlot(immune.cluster, features=c("FN1","C1QA","CCL13","CCL18"))
FeaturePlot(immune.cluster, features=c("PPARG","FABP4","LYVE1","RGS1"))

In [None]:
# --- Added Cell CellType information
new.cluster.ids <- c("AT2",#0
                    "Alveolar_Mac",#1
                    "Monocyte_derived",#2
                    "Alveolar_Mac",#3
                    "Monocyte_derived",#4
                    "AT2",#5
                    "AT2",#6
                    "T_cell",#7
                    "Monocyte",#8
                    "Monocyte_derived",#9
                    "Monocyte_derived",#10
                    "AT2",#11
                    "NK_cell",#12
                    "Alveolar_Mac",#13
                    "Monocyte",#14
                    "AT2",#15
                    "Monocyte",#16
                    "AT2",#17
                    "AT1",#18
                    "T_cell",#19
                    "Endothelial_cell",#20
                    "Monocyte",#21
                    "Ciliated_epithelial_cell",#22
                    "Monocyte_derived",#23
                    "Fibroblast",#24
                    "Mast_cell",#25
                    "T_cell",#26
                    "Monocyte_derived",#27
                    "Endothelial_cell",#28
                    "Endothelial_cell",#29
                    "Club_cell",#30
                    "T_cell",#31
                    "AT1",#32
                    "Ciliated_epithelial_cell",#33
                    "Endothelial_cell",#34
                    "Endothelial_cell",#35
                    "cDC",#36
                    "Fibroblast",#37
                    "B_cell",#38
                    "LEC",#39
                    "Myofibroblast",#40
                    "AT2",#41
                    "Lung_basal_cell",#42
                    "Monocyte_derived",#43
                    "Pericyte",#44
                    "Plasma",#45
                    "Low_Quality",#46！！！
                    "AT2",#47
                    "AT2",#48
                    "Endothelial_cell",#49
                    "Monocyte",#50
                    "AT2",#51
                    "Alveolar_Mac"#52
                    )
Idents(immune.cluster) <- "integrated_snn_res.2"
names(new.cluster.ids) <- levels(immune.cluster)
immune.cluster <- RenameIdents(immune.cluster, new.cluster.ids)
immune.cluster$CellType <- Idents(immune.cluster)# 将注释好的细胞信息保存到 ssc@meta.data 中
# res3,cluster40 考虑排除掉
cells.use <- WhichCells(ssc_combine, expression = integrated_snn_res.3 == 40)
cells.use
ssc_combine@meta.data[cells.use, "CellType"] <- "Low_Quality"
saveRDS(ssc_combine, file="data/SSC_Combine/anno_SSC_Combine.rds")

pdf(file="/share/home/qlab/projects/qlab_yrs/project_ssc/result/Umap/SSC_Combine/Anno2.pdf", width=15)
Idents(immune.cluster) <- "CellType"
DimPlot(immune.cluster, reduction = "umap",label = TRUE) + NoLegend()
dev.off()

In [None]:
# --- Merge CellSubType Information
immune.cluster <- readRDS('anno_SSC_Combine.rds')
# --- define ct_levle1
meta.data <- immune.cluster@meta.data %>% rownames_to_column(var = 'barcode') %>% 
                .[,c('barcode', 'orig.ident', 'nCount_RNA', 'nFeature_RNA', 'nCount_AUC', 'nFeature_AUC', 'SamNum', 'SampleID',
                'Group', 'Site', 'Gender', 'Age', 'Cohort', 'percent.mt', 'S.Score', 'G2M.Score', 'Phase', 
                'old.ident', 'IFN.Score1', 'integrated_snn_res.0.1', 'integrated_snn_res.0.2', 'integrated_snn_res.0.4',
                'integrated_snn_res.0.8', 'integrated_snn_res.1.2', 'integrated_snn_res.1.6', 'integrated_snn_res.2',
                'integrated_snn_res.3', 'seurat_clusters', 'CellType', 'pANN', 'DF.classifications', 
                'cell', 'DF2', 'DF_hi.lo')]
meta.data$CellType <- as.character(meta.data$CellType)
meta.data$ct_level1 <- ''
meta.data$ct_level1[which(meta.data$CellType %in% c('AT2', 'AT1', 'Ciliated_epithelial_cell', 'Club_cell', 'Lung_basal_cell'))] <- 'Epithelial'
meta.data$ct_level1[which(meta.data$CellType %in% c('Endothelial_cell', 'LEC'))] <- 'Endothelial'
meta.data$ct_level1[which(meta.data$CellType %in% c('Alveolar_Mac', 'Monocyte_derived', 'Monocyte', 'Mast_cell', 'cDC'))] <- 'Myeloid'
meta.data$ct_level1[which(meta.data$CellType %in% c('NK_cell', 'T_cell' ))] <- 'Lymphocyte_T/NK'
meta.data$ct_level1[which(meta.data$CellType %in% c('B_cell', 'Plasma'))] <- 'Lymphocyte_B'
meta.data$ct_level1[which(meta.data$CellType %in% c('Fibroblast', 'Pericyte', 'Myofibroblast'))] <- 'Fibroblast'
# ---Merge ct_level2
at.meta <- readRDS('../at_trb/all_assays_samples.rds') %>% .@meta.data %>% rownames_to_column(var = 'barcode') %>% mutate(ct_level2 = AT_CellType) %>% .[,c('barcode', 'ct_level2')]
b.meta <- readRDS('../b/B_lung.rds') %>% .@meta.data %>% rownames_to_column(var = 'barcode') %>% mutate(ct_level2 = b_CellType) %>% .[,c('barcode', 'ct_level2')]
ec.meta <- readRDS('../ec/02_anno.rds') %>% .@meta.data %>% rownames_to_column(var = 'barcode') %>% mutate(ct_level2 = EC_CellType) %>% .[,c('barcode', 'ct_level2')]
fib.meta <- readRDS('../fib/fib_sm_pericyte.rds') %>% .@meta.data %>% rownames_to_column(var = 'barcode') %>% mutate(ct_level2 = fib_CellType) %>% .[,c('barcode', 'ct_level2')]
mono.meta <- readRDS('../mono/all_assay_samples.rds')  %>% .@meta.data %>% rownames_to_column(var = 'barcode') %>% mutate(ct_level2 = mo_CellType) %>% .[,c('barcode', 'ct_level2')]
t.meta <- readRDS('../t/T_lung.rds') %>% .@meta.data %>% rownames_to_column(var = 'barcode') %>% mutate(ct_level2 = t_CellType) %>% .[,c('barcode', 'ct_level2')]
ct.l2.meta <- rbind(at.meta, b.meta) %>% rbind(ec.meta) %>% rbind(fib.meta) %>% rbind(mono.meta) %>% rbind(t.meta)
meta.data <- left_join(meta.data, ct.l2.meta) %>% 
                # added Unclassified Subtypes
                mutate(ct_level2 = ifelse(CellType == 'Ciliated_epithelial_cell', 'Ciliated', ct_level2)) %>% 
                mutate(ct_level2 = ifelse(CellType == 'Club_cell', 'Club', ct_level2)) %>% 
                mutate(ct_level2 = ifelse(CellType == 'Lung_basal_cell', 'Basal', ct_level2)) %>% 
                mutate(ct_level2 = ifelse(CellType == 'LEC', 'LymphEC', ct_level2)) %>% 
                mutate(ct_level2 = ifelse(CellType == 'Mast_cell', 'Mast', ct_level2)) %>% 
                mutate(ct_level2 = ifelse(CellType == 'cDC', 'cDC', ct_level2)) %>%
                mutate(ct_level2 = ifelse(is.na(ct_level2), '', ct_level2)) 
immune.cluster@meta.data <- meta.data %>% column_to_rownames(var = 'barcode')
saveRDS(immune.cluster, 'ssc_lung_allCT.rds')

In [None]:
# --- Rename Phenotype
immune.cluster <- readRDS('ssc_lung_allCT.rds')
immune.cluster@meta.data$CellType <- as.character(immune.cluster@meta.data$CellType)
immune.cluster@meta.data$CellType[which(immune.cluster@meta.data$CellType == 'MD_Mac')] <- 'Md_Mac'
immune.cluster@meta.data$CellType[which(immune.cluster@meta.data$CellType == 'T_cell')] <- 'Tcell'
immune.cluster@meta.data$CellType[which(immune.cluster@meta.data$CellType == 'NK_cell')] <- 'NK'
immune.cluster@meta.data$CellType[which(immune.cluster@meta.data$CellType == 'B_cell')] <- 'Bcell'
immune.cluster@meta.data$CellType[which(immune.cluster@meta.data$CellType == 'Endothelial')] <- 'EC'
immune.cluster@meta.data$CellType[which(immune.cluster@meta.data$CellType == 'Mast_cell')] <- 'Mast'
immune.cluster@meta.data$CellType[which(immune.cluster@meta.data$CellType == 'Lymph_EC')] <- 'LymEC'
immune.cluster@meta.data$CellType[which(immune.cluster@meta.data$CellType == 'Smooth_muscle')] <- 'SMC'

ct.res <- data.frame(CellType = immune.cluster@meta.data$CellType %>% as.character() %>% unique(),
                     celltype.res.0.1 = 0:(length(immune.cluster@meta.data$CellType %>% as.character() %>% unique()) - 1))
meta <- immune.cluster@meta.data
immune.cluster@meta.data <- meta %>% 
                                rownames_to_column(var = 'cell') %>%
                                left_join(ct.res, by = 'CellType') %>%
                                column_to_rownames(var = 'cell')
saveRDS(immune.cluster, 'ssc_lung_allCT.rds')