Scissor R包github在 https://github.com/sunduanchen/Scissor.

In [None]:
#安装
#devtools::install_github('sunduanchen/Scissor')

## 不同细胞类型的Scissor，只要修改celltype即可

In [None]:
celltype <- "Macrophages"  ###修改细胞类型的名称

In [None]:
library(Seurat)
library(preprocessCore)
# library(scAB)
library(Scissor)
library(qs)
library(patchwork)

In [None]:
bulk_df <- read.table("../1_bulk_data/Bulk_data.txt", header = TRUE, sep = "\t", stringsAsFactors = FALSE)
Phenotype_df <- read.table("../1_bulk_data/Phenotype.txt",row.names = 1, header = TRUE, sep = "\t", stringsAsFactors = FALSE)

In [None]:
# 假设 bulk_df 已经读取
rownames(bulk_df) <- bulk_df$GeneSymbol   # 将 GeneSymbol 列设为行名
bulk_df$GeneSymbol <- NULL                # 删除 GeneSymbol 列

In [None]:
#样本名称需保持一致
table(colnames(bulk_df) == row.names(Phenotype_df))

In [None]:
sc_dataset = qread(paste0("/home/guoliming/Brown/ALI_CRL/ALI/",celltype,"/Outdata/Sub_annotion.qs"))

In [None]:
# Idents(sc_dataset) <- sc_dataset@meta.data$RNA_snn_res.0.1

In [None]:
UMAP_celltype <- DimPlot(sc_dataset, reduction ="umap",
                         group.by="celltype",label = T)  ## 用RNA_snn_res.0.1进行sessors
                        # label = T)
options(repr.plot.width = 6, repr.plot.height = 4.5)
UMAP_celltype

In [None]:
levels(sc_dataset)

### 运行Scissor

In [None]:
Scissor <- function (bulk_dataset, sc_dataset, phenotype, tag = NULL, alpha = NULL, 
                     cutoff = 0.2, family = c("gaussian", "binomial", "cox"), 
                     Save_file = "Scissor_inputs.RData", Load_file = NULL) 
{
  library(Seurat)
  library(Matrix)
  library(preprocessCore)
  if (is.null(Load_file)) {
    common <- intersect(rownames(bulk_dataset), rownames(sc_dataset))
    if (length(common) == 0) {
      stop("There is no common genes between the given single-cell and bulk samples.")
    }
    if (class(sc_dataset) == "Seurat") {
      sc_exprs <- as.matrix(sc_dataset@assays$RNA@data)
      network <- as.matrix(sc_dataset@graphs$RNA_snn)
    }
    else {
      sc_exprs <- as.matrix(sc_dataset)
      Seurat_tmp <- CreateSeuratObject(sc_dataset)
      Seurat_tmp <- FindVariableFeatures(Seurat_tmp, selection.method = "vst", 
                                         verbose = F)
      Seurat_tmp <- ScaleData(Seurat_tmp, verbose = F)
      Seurat_tmp <- RunPCA(Seurat_tmp, features = VariableFeatures(Seurat_tmp), 
                           verbose = F)
      Seurat_tmp <- FindNeighbors(Seurat_tmp, dims = 1:10, 
                                  verbose = F)
      network <- as.matrix(Seurat_tmp@graphs$RNA_snn)
    }
    diag(network) <- 0
    network[which(network != 0)] <- 1
    dataset0 <- cbind(bulk_dataset[common, ], sc_exprs[common, 
    ])
    dataset1 <- normalize.quantiles(as.matrix(dataset0))
    rownames(dataset1) <- rownames(dataset0)
    colnames(dataset1) <- colnames(dataset0)
    Expression_bulk <- dataset1[, 1:ncol(bulk_dataset)]
    Expression_cell <- dataset1[, (ncol(bulk_dataset) + 
                                     1):ncol(dataset1)]
    X <- cor(Expression_bulk, Expression_cell)
    quality_check <- quantile(X)
    print("|**************************************************|")
    print("Performing quality-check for the correlations")
    print("The five-number summary of correlations:")
    print(quality_check)
    print("|**************************************************|")
    if (quality_check[3] < 0.01) {
      warning("The median correlation between the single-cell and bulk samples is relatively low.")
    }
    if (family == "binomial") {
      Y <- as.numeric(phenotype)
      z <- table(Y)
      if (length(z) != length(tag)) {
        stop("The length differs between tags and phenotypes. Please check Scissor inputs and selected regression type.")
      }
      else {
        print(sprintf("Current phenotype contains %d %s and %d %s samples.", 
                      z[1], tag[1], z[2], tag[2]))
        print("Perform logistic regression on the given phenotypes:")
      }
    }
    if (family == "gaussian") {
      Y <- as.numeric(phenotype)
      z <- table(Y)
      if (length(z) != length(tag)) {
        stop("The length differs between tags and phenotypes. Please check Scissor inputs and selected regression type.")
      }
      else {
        tmp <- paste(z, tag)
        print(paste0("Current phenotype contains ", 
                     paste(tmp[1:(length(z) - 1)], collapse = ", "), 
                     ", and ", tmp[length(z)], " samples."))
        print("Perform linear regression on the given phenotypes:")
      }
    }
    if (family == "cox") {
      Y <- as.matrix(phenotype)
      if (ncol(Y) != 2) {
        stop("The size of survival data is wrong. Please check Scissor inputs and selected regression type.")
      }
      else {
        print("Perform cox regression on the given clinical outcomes:")
      }
    }
    save(X, Y, network, Expression_bulk, Expression_cell, 
         file = Save_file)
  }
  else {
    load(Load_file)
  }
  if (is.null(alpha)) {
    alpha <- c(0.005, 0.01, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 
               0.6, 0.7, 0.8, 0.9)
  }
  for (i in 1:length(alpha)) {
    set.seed(123)
    fit0 <- APML1(X, Y, family = family, penalty = "Net", 
                  alpha = alpha[i], Omega = network, nlambda = 100, 
                  nfolds = min(10, nrow(X)))
    fit1 <- APML1(X, Y, family = family, penalty = "Net", 
                  alpha = alpha[i], Omega = network, lambda = fit0$lambda.min)
    if (family == "binomial") {
      Coefs <- as.numeric(fit1$Beta[2:(ncol(X) + 1)])
    }
    else {
      Coefs <- as.numeric(fit1$Beta)
    }
    Cell1 <- colnames(X)[which(Coefs > 0)]
    Cell2 <- colnames(X)[which(Coefs < 0)]
    percentage <- (length(Cell1) + length(Cell2))/ncol(X)
    print(sprintf("alpha = %s", alpha[i]))
    print(sprintf("Scissor identified %d Scissor+ cells and %d Scissor- cells.", 
                  length(Cell1), length(Cell2)))
    print(sprintf("The percentage of selected cell is: %s%%", 
                  formatC(percentage * 100, format = "f", digits = 3)))
    if (percentage < cutoff) {
      break
    }
    cat("\n")
  }
  print("|**************************************************|")
  return(list(para = list(alpha = alpha[i], lambda = fit0$lambda.min, 
                          family = family), Coefs = Coefs, Scissor_pos = Cell1, 
              Scissor_neg = Cell2))
}

再次运行：

In [None]:
infos1 <- Scissor(bulk_df, sc_dataset, Phenotype_df, alpha = 0.05, 
                  family = "cox", Save_file = paste0(celltype,".RData"))

In [None]:
pdf(paste0("Bulk1_DimPlot_", celltype, ".pdf"), height = 8, width = 20) #单位是英寸

# 创建 Scissor 标记
Scissor_select <- rep("Neutral", ncol(sc_dataset))  # 先统一设为 Neutral
names(Scissor_select) <- colnames(sc_dataset)
Scissor_select[infos1$Scissor_pos] <- "Scissor+"
Scissor_select[infos1$Scissor_neg] <- "Scissor-"

# 添加到 Seurat 对象
sc_dataset <- AddMetaData(sc_dataset, metadata = Scissor_select, col.name = "scissor")

# 强制设置 factor 顺序，确保颜色对应
sc_dataset$scissor <- factor(sc_dataset$scissor, levels = c("Neutral", "Scissor+", "Scissor-"))

# 绘制 UMAP
UMAP_scissor <- DimPlot(sc_dataset, reduction = 'umap', 
                        group.by = 'scissor',
                        cols = c('grey','royalblue','indianred1'), 
                        pt.size = 0.001)

# 并排显示
options(repr.plot.width = 20, repr.plot.height = 8)
patchwork::wrap_plots(plots = list(UMAP_celltype, UMAP_scissor), ncol = 2)

dev.off()


In [None]:
library(gplots)

pdf(paste0("Bulk1_Balloonplot_", celltype, ".pdf"), height = 10, width = 18)

# 调整边距以免标签被切掉
op <- par(mar = c(6, 8, 4, 2))  

tbl <- table(sc_dataset$scissor, sc_dataset$celltype)

balloonplot(
  tbl,
  main = paste0("Bulk1_Balloonplot_", celltype),
  xlab = "Cell type",        # 如果不需要可不传
  ylab = "Scissor",          # 如果不需要可不传
  dotsize = 2,               # 最大点大小（根据设备调节）
  dotchar = 19,
  dotcolor = "steelblue",

  # 下面这两个是关键：控制轴/标签文字大小 和 气泡内数字大小
  text.size  = 1.6,          # 控制行/列标签与边缘文字（相当于 cex）
  text.color = "black",

  label = TRUE,              # 是否在气泡上显示数值
  label.size = 1.4,          # 气泡内数值大小（相当于 cex）
  label.color = "white",
  # label.digits = 0,          # 小数位

  show.margins = FALSE
)

par(op)
dev.off()