In [1]:
library(Seurat)
library(ggplot2)
library(dplyr)
library(scutilsR)
library(tidyverse)
library(celda)
library(sceasy)
library(enrichR)
library(Nebulosa)
library(magrittr)
library(SeuratWrappers)
library(glue)
library(data.table)
library(ggsci)

Loading required package: SeuratObject

Loading required package: sp


Attaching package: ‘SeuratObject’


The following objects are masked from ‘package:base’:

    intersect, t



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.6.0
[32m✔[39m [34mlubridate[39m 1.9.4     [32m✔[39m [34mtibble   [39m 3.3.0
[32m✔[39m [34mpurrr    [39m 1.1.0     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mreadr    [39m 2.1.5     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag(

In [21]:
#### 导入数据 ####
samples <- list.dirs("/home/data/tanglei/project/prostate_altas/data/solo_matrix", full.names = F, recursive = F)
seu.list <- pbapply::pblapply(samples, function(sn) {
  counts <- Read10X(file.path("/home/data/tanglei/project/prostate_altas/data/solo_matrix/", sn, "filtered"))
  sn <- gsub("_", "-", sn) # 注意"_"在`CreateSeuratObject()`里有特殊的意义
  colnames(counts) <- paste(sn, colnames(counts), sep = "_")
  seu <- CreateSeuratObject(counts = counts)
  return(seu)
})
## 合并样本
seu <- base::Reduce(f = merge, x = seu.list)

In [3]:
## 粗过过滤nFeature<500
seu <- subset(seu, nFeature_RNA > 500)

In [4]:
scobj <- seu
rm(seu)
rm(seu.list)

In [5]:
scobj = JoinLayers(scobj)

In [None]:
## 计算质控指标
##线粒体
scobj[["percent.mt"]] <- PercentageFeatureSet(scobj, pattern = "^MT-")

In [None]:
## 细胞周期
scobj <- NormalizeData(scobj)
s.genes <- cc.genes.updated.2019$s.genes
g2m.genes <- cc.genes.updated.2019$g2m.genes
scobj <- CellCycleScoring(scobj, s.features = s.genes, g2m.features = g2m.genes)

In [None]:
## 双细胞（Seurat v5 使用 layer，不再有 @counts slot）
library(scDblFinder)
library(SingleCellExperiment)

# 兼容 Seurat v4/v5：优先从 layer 取 counts；若存在多个 counts.* layer，先合并
counts <- NULL
rna <- scobj[["RNA"]]

layers <- tryCatch(SeuratObject::Layers(rna), error = function(e) NULL)
if (!is.null(layers)) {
  cnt_layers <- grep("^counts", layers, value = TRUE)
  if (length(cnt_layers) == 1) {
    counts <- SeuratObject::GetAssayData(scobj, assay = "RNA", layer = cnt_layers[[1]])
  } else if (length(cnt_layers) > 1) {
    if ("JoinLayers" %in% getNamespaceExports("SeuratObject")) {
      # JoinLayers() 要求 layers 与 new 长度一致；用重复的 new 来把多个 counts.* 分组到同一个 "counts"
      scobj <- SeuratObject::JoinLayers(
        scobj,
        assay = "RNA",
        layers = cnt_layers,
        new = rep("counts", length(cnt_layers))
      )
      counts <- SeuratObject::GetAssayData(scobj, assay = "RNA", layer = "counts")
    } else {
      counts_list <- lapply(cnt_layers, function(ly) SeuratObject::GetAssayData(scobj, assay = "RNA", layer = ly))
      counts <- Reduce(Matrix::cbind2, counts_list)
    }
  }
}

if (is.null(counts)) {
  # Seurat v4 兜底（slot 写法）
  counts <- SeuratObject::GetAssayData(scobj, assay = "RNA", slot = "counts")
}

sce <- SingleCellExperiment(list(counts = counts))
sce <- scDblFinder(sce)

scobj$scDblFinder.class <- sce$scDblFinder.class
scobj$scDblFinder.score <- sce$scDblFinder.score

rm(sce, counts, rna, layers)

In [None]:
## QC score
QC_genes <- c("ACADL", "AP2S1", "ATP5F1D", "ATP5F1E", "ATP5MC1", "ATP5MC3", 
              "ATP5PF", "ATP5MF", "ATP5ME", "ATP5MG", "ATP6V1F", 
              "CHCHD10", "COA3", "COX5B", "COX6A1", "COX6B1", 
              "COX6C", "COX7A2", "COX7A2L", "COX7B", "CYCS", 
              "EDF1", "EEF1B2", "EIF5A", "FAU", "FKBP3", "FTL", 
              "GUK1", "HEPH", "HRAS", "MIF", "MRAP", "NACA", 
              "NDUFA1", "NDUFA2", "NDUFA4", "NDUFA5", "NDUFB7", 
              "NDUFC1", "NDUFS7", "NDUFV3", "NECAP1", "NLRP4", 
              "PDXP", "PFN2", "POLR2M", "RAB3A", "RTL8A", 
              "SLC16A2", "SNRPD2", "SNU13", "TAF1C", "TIMM8B", 
              "TPT1", "UBB", "UQCR11", "UQCRB", "UQCRQ", "USP50")
expr_matrix <- GetAssayData(scobj, assay = "RNA", slot = "data")
log_expr_values <- log1p(expr_matrix[QC_genes, ])
qc_scores <- colSums(log_expr_values, na.rm = TRUE)
scobj$qc_score <- qc_scores

In [10]:
## 应激
hot_shock <- c("FOS", "CXCL2", "ZFP36", "FOSB", "DUSP1", "ATF3", "CXCL8", 
  "NR4A1", "CXCL3", "PPP1R15A", "JUNB", "EGR1", "HSPA1A", "HSPA1B", 
  "SOCS3", "KLF6", "JUN", "IER2", "CXCL1", "NFKBIA", "HSPA6", "DNAJB1", 
  "IER3", "CCNL1", "MTRNR2L2", "IER5", "ID1", "CEBPD", "KRT6A", 
  "CYR61", "DEPP1", "CLDN4", "IRF1", "DUSP2", "BTG2", "PLAUR", 
  "MAFF", "KLF4", "PHLDA2", "TNFAIP3", "ACTG1", "BTG1", "DNAJB4", 
  "ERRFI1", "H3F3B", "HSPB1", "PCF11", "PXDC1", "SDC4", "SRF", 
  "TPM3", "USP2", "GADD45G", "ANKRD1", "FAM132B", "HIPK3", "HSPH1", 
  "IRF8", "KLF9", "NFKBIZ", "PDE4B", "RAP1B", "SERPINE1", "TPPP3", 
  "WAC", "HSPE1", "ARID5A", "DCN", "DUSP8", "HSP90AA1", "ID3", 
  "ITPKC", "LITAF", "NOP58", "PER1", "RASSF1", "SKIL", "SRSF7", 
  "TRA2A", "ZC3H12A", "CCRN4L", "DDX3X", "HSP90AB1", "IDI1", "LMNA", 
  "MYADM", "NPPC", "PHLDA1", "RHOB", "SLC10A6", "STAT3", "TRA2B", 
  "ZFAND5", "KCNE4", "ATF4", "CEBPB", "DDX5", "EGR2", "FOSL2", 
  "MYC", "PNP", "RHOH", "SLC38A2", "TAGLN2", "TRIB1", "BAG3", "DES", 
  "GADD45A", "JUND", "MAFK", "MYD88", "ODC1", "PNRC1", "RIPK1", 
  "SLC41A1", "TIPARP", "TUBB4B", "ZFP36L1", "BHLHE40", "CEBPG", 
  "DNAJA1", "EIF5", "GCC1", "HSPA5", "IFRD1", "KLF2", "MCL1", "NCKAP5L", 
  "OSGIN1", "SAT1", "TUBB6", "ZFP36L2", "BRD2", "CSRNP1", "ERF", 
  "GEM", "HSPA8", "IL6", "MIDN", "NCOA7", "OXNAD1", "SBNO2", "SQSTM1", 
  "TNFAIP6", "UBC", "ZYX", "MIR22HG", "MT1A", "SRSF5", "MT2A", 
  "EIF1", "PPP1CC", "ACTB", "ADAMTS1", "ADAMTS9", "AHNAK", "ANKRD11", 
  "ARF4", "AZIN1", "BAIAP2", "BAZ1A", "CAMK1D", "CCDC138", "CDKN1A", 
  "CHD4", "CHKA", "CLIC4", "CMSS1", "COL1A1", "CTNNB1", "CX3CR1", 
  "ELF2", "EP400", "ERN1", "ETF1", "FBXL18", "FLT1", "GADD45B", 
  "GLS", "GNAS", "GSK3A", "GSN", "HIVEP2", "INTS6", "JAK1", "JDP2", 
  "KDM6B", "KPNA1", "LSMEM1", "LUZP1", "MAGI3", "MAN1A1", "MAPKAPK2", 
  "MAPRE1", "MED13", "MSN", "MYLIP", "NABP1", "NASP", "NUFIP2", 
  "NUP210L", "PEAK1", "PECAM1", "POLG2", "PPP1CB", "PRKCG", "RNF19B", 
  "RTN4", "SERTAD2", "SGPL1", "SIK3", "SPAG9", "TAF4B", "TEX14", 
  "TOB2", "TOP1", "DIAPH1", "NEAT1", "PTMA", "ARIH1")
hot_shock_genes <- intersect(hot_shock, rownames(scobj))
scobj[["percent.stress"]] <- PercentageFeatureSet(scobj, features = hot_shock_genes)

In [11]:
##红细胞基因
erythrocyte_genes <- c(
  "GATA1",    # 红细胞转录因子
  "EPOR",     # 红细胞生成素受体
  "HBB",      # 血红蛋白β链
  "HBA1",     # 血红蛋白α链1
  "HBA2",     # 血红蛋白α链2
  "KLF1",     # 红细胞发育关键转录因子
  "SLC4A1",   # 红细胞膜蛋白
  "EKLF",     # 红细胞特异性转录因子
  "ALAS2",    # 红细胞δ氨基酮戊酸合成酶
  "TAL1"      # T细胞白血病/淋巴瘤蛋白1
)
erythrocyte_genes <- intersect(erythrocyte_genes, rownames(scobj))
scobj[["percent.ery"]] <- PercentageFeatureSet(scobj, features = erythrocyte_genes)

In [12]:
##计算核糖体基因
ribo.genes <- ProjectSVR::ribo.genes
ribo.genes <- intersect(ribo.genes, rownames(scobj))
scobj[["percent.ribo"]] <- PercentageFeatureSet(scobj, features = ribo.genes)

In [None]:
## RNA污染（Seurat v5 兼容）
library(SingleCellExperiment)

QuickCluster <- function(object) {
  object <- NormalizeData(object)
  object <- FindVariableFeatures(object, nfeatures = 2000)
  object <- ScaleData(object)
  object <- RunPCA(object)
  object <- FindNeighbors(object, reduction = "pca", dims = 1:30)
  object <- FindClusters(object)
  return(object)
}

# 兼容 Seurat v4/v5：稳健提取 counts 矩阵
get_counts_layer <- function(obj, assay = "RNA") {
  rna <- obj[[assay]]
  layers <- tryCatch(SeuratObject::Layers(rna), error = function(e) NULL)

  if (!is.null(layers)) {
    cnt_layers <- grep("^counts", layers, value = TRUE)
    if (length(cnt_layers) == 1) {
      return(SeuratObject::GetAssayData(obj, assay = assay, layer = cnt_layers[[1]]))
    }
    if (length(cnt_layers) > 1) {
      obj <- SeuratObject::JoinLayers(
        obj,
        assay = assay,
        layers = cnt_layers,
        new = rep("counts", length(cnt_layers))
      )
      return(SeuratObject::GetAssayData(obj, assay = assay, layer = "counts"))
    }
  }

  # Seurat v4 fallback
  SeuratObject::GetAssayData(obj, assay = assay, slot = "counts")
}

seu.list <- SplitObject(scobj, split.by = "orig.ident")
seu.list <- lapply(seu.list, QuickCluster)

clusters <- lapply(seu.list, function(xx) xx$seurat_clusters) %>% base::Reduce(c, .)
scobj$quick_clusters <- clusters[colnames(scobj)]

# 分样本运行 decontX，避免旧版函数对 Seurat v5 slot 的兼容问题
contam_list <- lapply(names(seu.list), function(sn) {
  obj <- seu.list[[sn]]
  counts <- get_counts_layer(obj, assay = "RNA")
  clus <- obj$seurat_clusters

  sce <- SingleCellExperiment::SingleCellExperiment(list(counts = counts))
  sce <- celda::decontX(sce, z = as.factor(clus), verbose = FALSE)

  contam <- as.numeric(SummarizedExperiment::colData(sce)$decontX_contamination)
  names(contam) <- colnames(sce)
  contam
})

decontX_contamination <- unlist(contam_list, use.names = TRUE)

# 按 scobj 细胞顺序对齐
contam_full <- rep(NA_real_, ncol(scobj))
names(contam_full) <- colnames(scobj)
contam_full[names(decontX_contamination)] <- decontX_contamination

scobj$decontX_contamination <- contam_full

rm(seu.list, clusters, contam_list, decontX_contamination, contam_full, get_counts_layer)

In [None]:
## 计算内含子占比（从 STARsolo velocyto 输出读取）
solo_velo_dir <- "/home/data/tanglei/project/prostate_altas/data/solo_velocyto"
velo_samples <- list.dirs(solo_velo_dir, recursive = FALSE, full.names = FALSE)

read_velo_mat <- function(sample_id, mat_name, base_dir) {
  d <- file.path(base_dir, sample_id, "filtered")
  m <- Matrix::readMM(file.path(d, paste0(mat_name, ".mtx")))
  m <- methods::as(m, "dgCMatrix")

  features <- data.table::fread(file.path(d, "features.tsv"), header = FALSE)
  barcodes <- data.table::fread(file.path(d, "barcodes.tsv"), header = FALSE)$V1

  gene_names <- if (ncol(features) >= 2) features[[2]] else features[[1]]
  rownames(m) <- make.unique(as.character(gene_names))

  sample_tag <- gsub("_", "-", sample_id)
  colnames(m) <- paste0(sample_tag, "_", as.character(barcodes))
  m
}

if (length(velo_samples) == 0) {
  stop("No samples found under solo_velocyto directory")
}

spliced_list <- lapply(velo_samples, function(sn) read_velo_mat(sn, "spliced", solo_velo_dir))
unspliced_list <- lapply(velo_samples, function(sn) read_velo_mat(sn, "unspliced", solo_velo_dir))

# 多样本按共同基因拼接列
common_genes <- Reduce(intersect, lapply(spliced_list, rownames))
common_genes <- intersect(common_genes, Reduce(intersect, lapply(unspliced_list, rownames)))

spliced_all <- do.call(cbind, lapply(spliced_list, function(m) m[common_genes, , drop = FALSE]))
unspliced_all <- do.call(cbind, lapply(unspliced_list, function(m) m[common_genes, , drop = FALSE]))

# 与 Seurat 细胞名对齐
common_cells <- intersect(colnames(scobj), colnames(spliced_all))
common_cells <- intersect(common_cells, colnames(unspliced_all))
missing_cells <- setdiff(colnames(scobj), common_cells)

emat <- spliced_all[, common_cells, drop = FALSE]
nmat <- unspliced_all[, common_cells, drop = FALSE]

e_sum <- Matrix::colSums(emat)
n_sum <- Matrix::colSums(nmat)
denom <- e_sum + n_sum

# 仅对匹配到 velocyto 的细胞计算；未匹配细胞默认设为 0（并给出提示）
percent.intron <- rep(0, ncol(scobj))
names(percent.intron) <- colnames(scobj)
ratio_common <- ifelse(denom > 0, n_sum / denom, 0)
percent.intron[common_cells] <- ratio_common

message("Matched cells for intron ratio: ", length(common_cells),
        "; missing in velocyto: ", length(missing_cells))
if (length(missing_cells) > 0) {
  message("Example missing cells: ", paste(head(missing_cells, 5), collapse = ", "))
}

scobj[["percent.intron"]] <- percent.intron

rm(solo_velo_dir, velo_samples, spliced_list, unspliced_list, common_genes,
   spliced_all, unspliced_all, common_cells, emat, nmat, e_sum, n_sum,
   denom, percent.intron, ratio_common, read_velo_mat)

In [None]:
## 通过质控矩阵得到质控得分
## 质控矩阵
table(scobj@meta.data$orig.ident)
sample = "GSM4773521"
seu1=subset(scobj, subset = orig.ident==sample)

metadata=seu1@meta.data
qc.names = c("nCount_RNA", "nFeature_RNA", "percent.mt", "percent.ribo", "percent.intron",
             "percent.ery", "scDblFinder.score",
             "decontX_contamination", "qc_score","percent.stress")

qc.mat = metadata[, qc.names, drop = FALSE]

In [None]:
## MinMax缩放
qc.mat <- as.data.frame(lapply(qc.mat, as.numeric))
rownames(qc.mat) <- rownames(metadata)
qc.minmax <- as.data.frame(
  apply(qc.mat, 2, function(x) {
    (x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
  })
)
summary(qc.minmax)

In [None]:
## dist
dist.mat = dist(qc.minmax[sample(1:nrow(qc.minmax), size = 2000), ])
hist(dist.mat)

In [34]:
## kNN graph
nn = RANN::nn2(qc.minmax, k = 20)
nn.idx <- nn$nn.idx[, -1]  # 去掉自身这个最近邻

In [35]:
# 构建边表（from–to）
edges <- do.call(rbind, lapply(1:nrow(nn.idx), function(i) {
  from <- i
  to <- nn.idx[i, ]
  cbind(from = from, to = to)
}))
edges <- unique(t(apply(edges, 1, sort)))  # undirected 去重

In [None]:
#  用 igraph 构建图
library(igraph)
g <- graph_from_edgelist(edges, directed = FALSE)

In [None]:
# Louvain 聚类
set.seed(123)
louvain.res <- cluster_louvain(g, resolution = 0.9)
metadata=seu1@meta.data
metadata$louvain <- factor(membership(louvain.res))

prop.table(table(metadata$louvain))

In [None]:
library(ggthemes)
library(viridis)
metadata_long <- metadata %>%
  pivot_longer(
    cols = c(qc_score, qc.names), 
    names_to = "metric", 
    values_to = "value"
  )

In [None]:
ggplot(metadata_long, aes(factor(louvain), value, fill = factor(louvain))) + 
  geom_violin(
    scale = "width", 
    color = "black", 
    alpha = 0.7
  ) +
  geom_boxplot(
    width = 0.2, 
    fill = "white", 
    color = "black",
    outlier.shape = NA
  ) +
  facet_wrap(~metric, scales = "free_y", ncol = 1) +
  theme_minimal() +
  theme(
    strip.text = element_text(face = "bold", size = 15),  
    panel.grid.major.x = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.minor.y = element_blank(),
    axis.line = element_line(color = "black", size = 0.5),
    axis.text = element_text(color = "black", size = 12),
    axis.title = element_text(color = "black", face = "bold", size = 12),
    plot.title = element_text(hjust = 0.5, face = "bold", size = 20) 
  ) +
  labs(
    x = "Louvain Cluster",
    y = "Value",
    title = "QC Metrics Distribution Across Clusters"
  ) +
  scale_y_continuous(expand = c(0.02, 0)) +
  scale_fill_viridis(discrete = TRUE)

In [None]:
metadata=metadata[rownames(seu1@meta.data),]
seu1@meta.data=metadata
##可视化
seu1 <- RunUMAP(seu1, reduction = "qc", dims = 1:9,reduction.name = "umap_qc")
seu1 <- FindNeighbors(seu1, reduction = "qc", dims = 1:9, k.param = 20)
seu1 <- FindClusters(seu1, resolution = 0.6,cluster.name = "qc_cluster")
DimPlot(seu1, reduction = "umap_qc",label = T,group.by = "louvain")

In [None]:
## label
Idents(seu1) = "louvain"
for(i in 1:nrow(seu1@meta.data)) {
  if(seu1@meta.data$louvain[i] == 1) {
    seu1@meta.data$final_state_GSE[i] <- "Low"
  }
  if(seu1@meta.data$louvain[i] == 2) {
    seu1@meta.data$final_state_GSE[i] <- "Low"
  }
  if(seu1@meta.data$louvain[i] == 3) {
    seu1@meta.data$final_state_GSE[i] <- "High"
  }
  if(seu1@meta.data$louvain[i] == 4) {
    seu1@meta.data$final_state_GSE[i] <- "Low"
  }
  if(seu1@meta.data$louvain[i] == 5) {
    seu1@meta.data$final_state_GSE[i] <- "Low"
  }
  if(seu1@meta.data$louvain[i] == 6) {
    seu1@meta.data$final_state_GSE[i] <- "High"
  }
  if(seu1@meta.data$louvain[i] == 7) {
    seu1@meta.data$final_state_GSE[i] <- "Doubt"
  }
  if(seu1@meta.data$louvain[i] == 8) {
    seu1@meta.data$final_state_GSE[i] <- "Low"
  }
  if(seu1@meta.data$louvain[i] == 9) {
    seu1@meta.data$final_state_GSE[i] <- "Low"
  }
    if(seu1@meta.data$louvain[i] == 10) {
    seu1@meta.data$final_state_GSE[i] <- "Low"
  }
    if(seu1@meta.data$louvain[i] == 11) {
    seu1@meta.data$final_state_GSE[i] <- "Low"
  }
}

DimPlot(seu1, reduction = "umap_qc",label = T,group.by = "final_state_GSE")
metadata=seu1@meta.data
write.csv(metadata,glue("/home/data/tanglei/project/prostate_altas/output/{sample}_QC.csv"))

In [None]:
##合并数据
# 初始化一个空的列表以存储读取的数据框
meta_list <- list()

# 获取目录下的所有CSV文件
csv_files <- list.files("/home/data/tanglei/project/prostate_altas/output", pattern = "*.csv", full.names = TRUE)

In [None]:
# 循环读取每个CSV文件并加入到列表中
for (file in csv_files) {
  tmp <- fread(file, data.table = FALSE)  # 读取CSV文件
  rownames(tmp) <- tmp$V1                  # 设置行名
  tmp <- tmp[, -1]                         # 删除原始行名列
  meta_list[[file]] <- tmp                 # 将数据框添加到列表
}

In [None]:
# 将列表中的所有数据框合并为一个大的数据框，缺失的列用NA填充
meta_list <- lapply(meta_list, function(df) {
  df[,"final_state_GSE", drop = FALSE]  
})

qc <- bind_rows(meta_list)
metadata=scobj@meta.data
qc=qc[rownames(metadata),,drop = F]
metadata=cbind(metadata,qc)
scobj@meta.data=metadata

In [None]:
## 合并metadata信息
meta = data.table::fread("/home/data/tanglei/project/prostate_altas/data/metadata.csv")
metadata = scobj@meta.data
metadata$sample.ID = metadata$orig.ident
metadata$GSE.ID = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$GSE.ID)
metadata$celltype_article = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$celltype_article)
metadata$celltype_article_H1 = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$celltype_article_H1)
metadata$celltype_article_H2 = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$celltype_article_H2)
metadata$celltype_article_H3 = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$celltype_article_H3)
metadata$type = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$type)
metadata$sample_from_met = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$sample_from_met)
metadata$stage = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$stage)
metadata$age = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$age)
metadata$PSA = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$PSA)
metadata$Gleason = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$Gleason)
metadata$type_zone = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$type_zone)
metadata$type_composition = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$type_composition)
metadata$Prostate_volume = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$Prostate_volume)
metadata$Prostate_volume_state = plyr::mapvalues(metadata$orig.ident,from = meta$sample.ID,to = meta$Prostate_volume_state)

scobj@meta.data <- metadata

In [None]:
qs::qsave(scobj,"/home/data/tanglei/project/prostate_altas/output/Seurat_QC.qs")