In [None]:
library(Seurat)
library(glue)

seu_all <- qs::qread("/home/data/tanglei/project/prostate_altas/output/Seurat_QC.qs")

# 按样本名做 sketch
sample_var <- "sample.ID"
meta_cols <- colnames(seu_all[[]])
if (!(sample_var %in% meta_cols)) {
  stop(glue("sample_var '{sample_var}' not found in metadata."))
}

if ("RNA" %in% Assays(seu_all)) {
  DefaultAssay(seu_all) <- "RNA"
}

## subsample by sample

sc_sketch <- function(sample_id, seu_all, sample_var, min_cells = 2000) {
  cell_idx <- seu_all[[]][[sample_var]] == sample_id
  cell_idx[is.na(cell_idx)] <- FALSE

  if (!any(cell_idx)) {
    warning(glue("Sample {sample_id} not found in metadata column {sample_var}, skip."))
    return(invisible(NULL))
  }

  # 直接 subset 细胞，不依赖 barcode 与 sample_id 的对应关系
  seu <- subset(seu_all, cells = colnames(seu_all)[cell_idx])

  current_cells <- ncol(seu)
  sketched.cells <- ceiling(current_cells * 0.2)

  if (current_cells <= min_cells) {
    message(glue("Sample {sample_id} has {current_cells} cells, less than or equal to minimum {min_cells} cells."))
    message(glue("Will use all {current_cells} cells without sketching"))
    sketched.cells <- current_cells
  } else {
    sketched.cells <- max(sketched.cells, min_cells)
    message(glue("Sample {sample_id} has {current_cells} cells."))
    message(glue("Will sketch to {sketched.cells} cells (20% of total or {min_cells} cells, whichever is larger)"))
  }

  seu <- NormalizeData(seu)
  seu <- FindVariableFeatures(seu, verbose = FALSE)
  seu <- SketchData(object = seu, ncells = sketched.cells, method = "LeverageScore", sketched.assay = "sketch")

  DefaultAssay(seu) <- "sketch"
  if ("RNA" %in% Assays(seu)) {
    seu[["RNA"]] <- NULL
  }

  return(seu)
}


## 根据 sample.ID 循环（不保存文件）
sample_ids <- unique(as.character(seu_all[[]][[sample_var]]))
sample_ids <- sample_ids[!is.na(sample_ids) & sample_ids != ""]

sketch_list <- vector("list", length(sample_ids))
names(sketch_list) <- sample_ids

for (i in seq_along(sample_ids)) {
  sample_id <- sample_ids[i]
  print(glue("[{i}/{length(sample_ids)}] Processing sample ({sample_var}): {sample_id}..."))
  sketch_list[[sample_id]] <- sc_sketch(sample_id = sample_id, seu_all = seu_all, sample_var = sample_var)
}

In [None]:
## merge
seu = merge(x = sketch_list[[1]], y = sketch_list[-1])
rm(sketch_list)
gc()
seu = NormalizeData(seu)
seu <- FindVariableFeatures(seu, verbose = F)
seu <- ScaleData(seu, verbose = F)
Layers(seu)
seu <- RunPCA(seu, verbose = F)

In [11]:
qs::qsave(seu,"/home/data/tanglei/project/prostate_altas/output/05/sketch.qs")