In [None]:
library(Seurat)
library(dplyr)
library(patchwork)
# library(readr)
library(ggplot2)
#有云服务器的，可开启并运算，这里我用4个线程：
library(future)
library(qs)
# check the current active plan
plan()
# change the current plan to access parallelization
plan("multisession", workers =40)
plan()

#设置可用的内存
# options(future.globals.maxSize = 4 * 1024^3)
plan("sequential")
future::plan()

### 质量控制并确定变异基因

In [None]:
# qread速度很快
library(qs)
system.time({
    seurat.data = qread(file = "../Outdata/Step1.RawCount_merged_seurat.qs")
           })

In [None]:
## 只选择一部分数据
seurat.data = subset(seurat.data, group %in% c("ALI"))

In [None]:
#使用PercentageFeatureSet函数计算线粒体基因的百分比
seurat.data[["percent.mt"]] <- PercentageFeatureSet(object = seurat.data, pattern = "^mt-")
pdf(file="04.featureViolin.pdf",width=10,height=6)           #保存基因特征小提琴图
VlnPlot(object = seurat.data, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3,group.by = "group")
dev.off()
seurat.data <- subset(x = seurat.data, subset = nFeature_RNA > 50 & percent.mt < 5)    #对数据进行过滤


In [None]:
#测序深度的相关性绘图
pdf(file="04.featureCor.pdf",width=10,height=6)              #保存基因特征相关性图
plot1 <- FeatureScatter(object = seurat.data, feature1 = "nCount_RNA", feature2 = "percent.mt",pt.size=1.5,group.by = "group")
plot2 <- FeatureScatter(object = seurat.data, feature1 = "nCount_RNA", feature2 = "nFeature_RNA",,pt.size=1.5,group.by = "group")
CombinePlots(plots = list(plot1, plot2))
dev.off()


In [None]:
#对数据进行标准化
seurat.data <- NormalizeData(object = seurat.data, normalization.method = "LogNormalize", scale.factor = 10000)
#提取那些在细胞间变异系数较大的基因
seurat.data <- FindVariableFeatures(object = seurat.data, selection.method = "vst", nfeatures = 2000)



In [None]:
#输出特征方差图
top10 <- head(x = VariableFeatures(object = seurat.data), 10)
pdf(file="04.featureVar.pdf",width=10,height=6)              #保存基因特征方差图
plot1 <- VariableFeaturePlot(object = seurat.data)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
CombinePlots(plots = list(plot1, plot2))
dev.off()

In [None]:
pcSelect=20

##PCA分析
seurat.data=ScaleData(seurat.data)                     #PCA降维之前的标准预处理步骤
seurat.data=RunPCA(object= seurat.data,npcs = pcSelect,pc.genes=VariableFeatures(object = seurat.data))     #PCA分析


In [None]:
seurat.data = seurat.data %>% 
    RunUMAP(reduction = "pca", dims = 1:pcSelect, verbose = F)

### 去批次

In [None]:
### 3.2 检查批次
options(repr.plot.width = 10, repr.plot.height = 4.5)
p1.compare=wrap_plots(ncol = 2,
                      DimPlot(seurat.data, reduction = "pca", group.by = "sampleID")+NoAxes()+ggtitle("Before_PCA"),
                      DimPlot(seurat.data, reduction = "umap", group.by = "sampleID")+NoAxes()+ggtitle("Before_UMAP"),
                      guides = "collect"
)
p1.compare

In [None]:
### 4.1 RunHarmony 小样本运行比较快
library(harmony)
seurat.data <- seurat.data %>% RunHarmony("sampleID", plot_convergence = T)

In [None]:
### 去批次之后，还需要再RunUMAP一次更新harmony
seurat.data <- seurat.data %>% 
  RunUMAP(reduction = "harmony", dims = 1:pcSelect, verbose = F)

In [None]:
p2.compare=wrap_plots(ncol = 2,
                      DimPlot(seurat.data, reduction = "harmony", group.by = "sampleID")+NoAxes()+ggtitle("After_PCA (harmony)"),
                      DimPlot(seurat.data, reduction = "umap", group.by = "sampleID")+NoAxes()+ggtitle("After_UMAP"),
                      guides = "collect"
)
# p2.compare

options(repr.plot.width = 10, repr.plot.height = 9)
wrap_plots(p1.compare, p2.compare, ncol = 1)


In [None]:
pdf(file="after_hamrmony.pdf",width=10,height=9)
wrap_plots(p1.compare, p2.compare, ncol = 1)
dev.off()

### 找合适的resolution

In [None]:
# 对比多种resolution的聚类结果
seurat.data <- FindNeighbors(seurat.data, reduction = "harmony", dims = 1:pcSelect)
for (res in c(0.05,0.1,0.2,0.3,0.5,0.8,1,1.2,1.4,1.5,2)){
  print(res)
  seurat.data <- FindClusters(seurat.data,resolution = res, algorithm = 1)
}

In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)
#umap可视化
cluster_umap <- wrap_plots(ncol = 5,
                           DimPlot(seurat.data, reduction = "umap", group.by = "RNA_snn_res.0.05", label = T) & NoAxes(),  
                           DimPlot(seurat.data, reduction = "umap", group.by = "RNA_snn_res.0.1", label = T) & NoAxes(),
                           DimPlot(seurat.data, reduction = "umap", group.by = "RNA_snn_res.0.2", label = T) & NoAxes(),
                           DimPlot(seurat.data, reduction = "umap", group.by = "RNA_snn_res.0.3", label = T)& NoAxes(),
                           DimPlot(seurat.data, reduction = "umap", group.by = "RNA_snn_res.0.5", label = T) & NoAxes(),
                           DimPlot(seurat.data, reduction = "umap", group.by = "RNA_snn_res.0.8", label = T) & NoAxes(), 
                           DimPlot(seurat.data, reduction = "umap", group.by = "RNA_snn_res.1", label = T) & NoAxes(),
                           DimPlot(seurat.data, reduction = "umap", group.by = "RNA_snn_res.1.2", label = T) & NoAxes(),
                           DimPlot(seurat.data, reduction = "umap", group.by = "RNA_snn_res.1.4", label = T)& NoAxes(),
                           DimPlot(seurat.data, reduction = "umap", group.by = "RNA_snn_res.1.5", label = T)& NoAxes()
)
cluster_umap

In [None]:
Idents(seurat.data) <- seurat.data@meta.data$RNA_snn_res.1

In [None]:
pdf(file="07.Umap_1.pdf",width=6.5,height=6)
# 默认用seurat_clusters列（最后一次聚类得到的列）
DimPlot(seurat.data, reduction = "umap", label = TRUE) & NoAxes()
dev.off()

In [None]:
#qs速度快
#install.packages('qs')
library(qs)
system.time({
    qsave(seurat.data,file = "./Outdata/Cluster_no_annotion.qs") 
})

### 主要细胞类型注释

In [None]:
# 读取未注释的数据
seurat.data = qread(file = "./Outdata/Cluster_no_annotion.qs")

In [None]:
Idents(seurat.data) <- seurat.data@meta.data$RNA_snn_res.1

options(repr.plot.width = 15, repr.plot.height = 8)

check_genes = c(
                "Adgre1","Fcgr1","Cd68", # 巨噬细胞
                "Cpa3","Hpgds","Ms4a2", # 肥大细胞
                "Clec10a","Clec4c", #DC cells
                "Retnlg","Fcer1g","Cd14",   # 髓系细胞 (Myeloid_cells)  需要进一步细分巨噬细胞、中性粒细胞
                "Col1a1","Col1a2","Dcn",# 成纤维细胞 Fibroblast
                "Epcam","Cdh1","Krt18",   # 上皮细胞 (Epithelial cells)
                "S100a9", "S100a8",'Csf3r',"Fcgr3b", #"Mki67", #中性粒细胞
                "Cd79a", "Ms4a1","Cd19","Igkc", #B细胞
                "Acta2", "Myh11", #平滑肌细胞 Smooth muscle cells
                "Cd3d","Cd3g", #T细胞
                "Nkg7","Gzma","Ccl5",   # NK细胞 (NK_cells)
                "Ppbp","Gp1bb", # Platelets
                "Cldn5","Pecam1","Ramp2"   # 内皮细胞 (Endothelial cells)
)


DotPlot(object = seurat.data, features = check_genes, 
        assay = "RNA",scale = T) + coord_flip()

In [None]:
check_genes = c(
                "Adgre1","Fcgr1","Cd68", # 巨噬细胞
                "Cpa3","Hpgds","Ms4a2", # 肥大细胞
                "Clec10a","Clec4c", #DC cells
                "Retnlg","Fcer1g","Cd14",   # 髓系细胞 (Myeloid_cells)  需要进一步细分巨噬细胞、中性粒细胞
                "Col1a1","Col1a2","Dcn",# 成纤维细胞 Fibroblast
                "Epcam","Cdh1","Krt18",   # 上皮细胞 (Epithelial cells)
                "S100a9", "S100a8",'Csf3r',"Fcgr3b", #"Mki67", #中性粒细胞
                "Cd79a", "Ms4a1","Cd19","Igkc", #B细胞
                "Acta2", "Myh11", #平滑肌细胞 Smooth muscle cells
                "Cd3d","Cd3g", #T细胞
                "Nkg7","Gzma","Ccl5",   # NK细胞 (NK_cells)
                "Ppbp","Gp1bb", # Platelets
                "Cldn5","Pecam1","Ramp2"   # 内皮细胞 (Endothelial cells)
)

###分配细胞名称
celltype=data.frame(ClusterID=0:38,celltype='NA')

## Neutrophils
celltype[celltype$ClusterID %in% c(0,5,8,9,15,22,23),2]='Neutrophils' #CRL

## Macrophage
celltype[celltype$ClusterID %in% c(2,4,11,13,17,19,33),2]='Macrophage' #CRL

# Myeloid_cells
celltype[celltype$ClusterID %in% c(27),2]='Other Myeloid cells' #CRL

## Fibroblast
celltype[celltype$ClusterID %in% c(10,14,16,25),2]='Fibroblast' #CRL

## Epithelial_cells
celltype[celltype$ClusterID %in% c(24,28,29,31),2]='Epithelial cells' #CRL

## B_cells
celltype[celltype$ClusterID %in% c(6,20,30,34,36),2]='B cells' # 

## Smooth muscle cells
celltype[celltype$ClusterID %in% c(26),2]='Smooth muscle cells' #CRL


## T_cells
celltype[celltype$ClusterID %in% c(3,21),2]='T cells'  #CRL

## NK_cells
celltype[celltype$ClusterID %in% c(1,18,38),2]='NK cells'  #CRL

## Endothelial_cells
celltype[celltype$ClusterID %in% c(7,12,35,37),2]='Endothelial cells' #CRL

# Platelets
celltype[celltype$ClusterID %in% c(32),2]='Platelets' #CRL




colnames(celltype) = c("ClusterID","celltype_main")
seurat.data@meta.data$celltype = "NA"
for(i in 1:nrow(celltype)){
  seurat.data@meta.data[which(seurat.data@active.ident == celltype$ClusterID[i]),'celltype'] <- celltype$celltype[i]}
table(seurat.data@meta.data$celltype)

In [None]:
pdf(file="09.Umap_annotion_1.pdf",width=12,height=10)
DimPlot(seurat.data, reduction = "umap", group.by = "celltype", label = T)& NoAxes()
dev.off()

In [None]:
head(seurat.data@meta.data)
Idents(seurat.data) <- seurat.data@meta.data$celltype
table(seurat.data@meta.data$celltype)

In [None]:
## 2.5 保存数据
qsave(seurat.data, file = "./Outdata/Step3.Cluster_annotion.qs")

### 按照指定细胞类型顺序绘制Marker基因图

In [None]:
seurat.data=qread(file = "./Outdata/Step3.Cluster_annotion.qs")

In [None]:
table(seurat.data@meta.data$celltype)

In [None]:
celltype_order <- c(
  "Endothelial cells",
  "Platelets",
  "NK cells",
  "T cells",
  "Smooth muscle cells",
  "B cells",
  "Neutrophils",
  "Epithelial cells",
  "Fibroblast",
  "Macrophage",
  "Other Myeloid cells"

)

# 确保 celltype 列为因子
seurat.data$celltype <- factor(seurat.data$celltype, levels = celltype_order)

# DotPlot
p <- DotPlot(
    seurat.data, 
    features = check_genes, 
    assay = "RNA", 
    scale = TRUE,
    group.by = "celltype"
) + 
  coord_flip() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# 保存
pdf(file="06.markerBubble.reordered.pdf", width=10, height=10)
print(p)
dev.off()
