In [None]:
library(Seurat)
library(stringr)

task = 'W202501040001388'
sample = 'TM2_test'

path = str_c("/data/work/output/DoubletFinder/",sample,"/",task, "/")
dir.create(path, recursive = TRUE, showWarnings = FALSE)

file = '/data/input/W202501040001388/scRNA-seq_v3.1.5/04.Matrix/FilterMatrix'
scrna_data <- Read10X(data.dir = file, gene.column=1)

seob <- CreateSeuratObject(
counts = scrna_data,
project = task,
min.cells = 3,
min.features = 200,
)
seob[['sample']]= sample

save(seob,file = str_c(path, "SoupX_",sample,".rdata"))

library(tidyverse)
library(patchwork)
library(DoubletFinder)
load(file = str_c(path, "SoupX_",sample,".rdata") ) 


seob = seob %>% 
  subset(., nFeature_RNA > 200) %>% 
  subset(., nCount_RNA > 500)   

seob = seob %>%
  NormalizeData() %>%
  FindVariableFeatures() %>%
  ScaleData()

seob = seob %>% 
  RunPCA() %>% 
  RunUMAP(dims = 1:30) %>%  
  FindNeighbors(dims = 1:30) %>% 
  FindClusters(resolution = 0.1)

pdf(str_c(path, "/umap_DoubletFinder_",sample,".pdf"), height = 8, width = 10)

DimPlot(seob, reduction = "umap", group.by = "seurat_clusters")

dev.off()


sweep.res.list <- paramSweep_v3(seob, PCs = 1:30, sct = FALSE)
sweep.stats <- summarizeSweep(sweep.res.list, GT = FALSE)
bcmvn <- find.pK(sweep.stats)
pk_best = bcmvn %>% 
  dplyr::arrange(desc(BCmetric)) %>% 
  dplyr::pull(pK) %>% 
  .[1] %>% as.character() %>% as.numeric()


pdf(str_c(path, "/bcmvn_DoubletFinder_",sample,".pdf"), height = 8, width = 12)

ggplot(bcmvn, aes(x=pK, y=BCmetric, group=1)) + 
  geom_point() +
  geom_line()

dev.off()


annotations <- seob$seurat_clusters
homotypic.prop <- modelHomotypic(annotations)  



nExp_poi <- round(0.07*nrow(seob@meta.data))
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))


seob <- doubletFinder_v3(seob, PCs = 1:30, 
                        pN = 0.25, pK = pk_best, nExp = nExp_poi, 
                        reuse.pANN = FALSE, sct = FALSE)

seob@meta.data[1:4,]


name = colnames(seob@meta.data)[[8]]

pdf(str_c(path, "/Doubletcounts_DoubletFinder_",sample,".pdf"), height = 10, width = 12)

DimPlot(seob, reduction = "umap", 
        group.by = name)

VlnPlot(seob, group.by = name, 
        features = c("nCount_RNA", "nFeature_RNA"), 
        pt.size = 0, ncol = 2)

dev.off()


seob <- seob[, seob@meta.data[[name]] == "Singlet"]


seob[["percent.mt"]] <- PercentageFeatureSet(
  seob,
  pattern = "^MT" 
)
seob[["percent.cp"]] <- PercentageFeatureSet(
  seob,
  pattern = "^CP" 
)

library(readr)
ccgenes_s <- read.table(
  file = "/data/work/input/cell_cycles_ITAG4.1_S_241212.csv",
  sep = ",",
  header = T)

ccgenes_g2m <- read.table(
  file = "/data/work/input/cell_cycles_ITAG4.1_G2M_241212.csv",
  sep = ",",
  header = T)

cc.genes$s.genes <- ccgenes_s$Gene_SL4.0
cc.genes$g2m.genes <- ccgenes_g2m$Gene_SL4.0

seob <- CellCycleScoring(
  seob,
  s.features = cc.genes$s.genes,
  g2m.features = cc.genes$g2m.genes
)

pdf(str_c(path, "/VlnPlot_",sample,".pdf"), height = 8, width = 12)

VlnPlot(
  seob,
  features = c("nFeature_RNA","nCount_RNA"),
  group.by = "sample",
  log = T,
  pt.size = 0.1
)

dev.off()

### 山峦图

pdf(str_c(path, "/RidgePlot_",sample,".pdf"), height = 4, width = 8)

RidgePlot(
  object = seob,
  features = c("nFeature_RNA","nCount_RNA"),
  log = T,
  ncol = 1,
  group.by = "sample"
)

dev.off()

### 散点图

pdf(str_c(path, "/FeatureScatter_",sample,".pdf"), height = 4, width = 12)

p1 <- FeatureScatter(
  seob,
  feature1 = "nCount_RNA",
  feature2 = "nFeature_RNA",
  group.by = "sample"
)

p2 <- FeatureScatter(
  seob,
  feature1 = "nCount_RNA",
  feature2 = "percent.mt",
  group.by = "sample"
)

p3 <- FeatureScatter(
  seob,
  feature1 = "nCount_RNA",
  feature2 = "percent.cp",
  group.by = "sample"
)
p1+p2+p3
dev.off()

seob <- subset(
    seob,
    subset = percent.mt <1 &
    percent.cp < 1
)

save(seob,file = str_c(path, "/Singlet_",sample,".rdata"))

seob <- SCTransform(
    seob,
    variable.features.n = 3000,
    verbose = FALSE
)

seob <- RunPCA(seob)
ElbowPlot(seob,ndims = 50)

seob <- RunTSNE(seob,
                dims = 1:30)

seob <- RunUMAP(seob,
                dims = 1:30)


seob <- FindNeighbors(
  seob,
  k.param = 20, 
  dims = 1:30
)


r = 0.3
seob <- FindClusters(
  seob,
  resolution = r, 
  random.seed = 1, 
                     )


pdf(str_c(path, "/Cluster_",sample,"_r", r, ".pdf"), height = 8, width = 24)

p1 <- DimPlot(
  seob,
  reduction = "pca",
  group.by = "seurat_clusters",
  label = T
) 
p2 <- DimPlot(
  seob,
  reduction = "tsne",
  group.by = "seurat_clusters",
  label = T
) 
p3 <- DimPlot(
  seob,
  reduction = "umap",
  group.by = "seurat_clusters",
  label = T
) 

p1+p2+p3

dev.off()

save(seob,file = str_c(path, "/Cluster_r", r, "_", sample,".rdata"))

saveRDS(seob,file = str_c(path, "/Cluster_r", r, "_", sample,".rds"))