In [None]:
"""
加测：/SGRNJ06/randd/PROJECT/scATAC/20231006_ME_sc/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/

小测：
/SGRNJ06/randd/PROJECT/scATAC/20230925_959595_H_EM_sc/A0920_4_ME_YSPB_WX_R_T7_EDTA5030
/SGRNJ06/randd/PROJECT/scATAC/20230925_959595_H_EM_sc/A0920_3_ME_YSPB_WX_D_T7_EDTA5030
"""

In [None]:
library(SeuratData)
InstallData("pbmcMultiome")

In [None]:
suppressWarnings(suppressMessages({
    library(Signac)
    library(Seurat)
    library(EnsDb.Mmusculus.v79)
    library(tidyverse)
    library(SingleR)
}))

In [None]:
counts <- Read10X_h5(filename = "/SGRNJ06/randd/PROJECT/scATAC/20231006_ME_sc/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/03.atac/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/outs//filtered_peak_bc_matrix.h5")
metadata <- read.csv(
  file = "/SGRNJ06/randd/PROJECT/scATAC/20231006_ME_sc/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/03.atac/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/outs//singlecell.csv",
  header = TRUE,
  row.names = 1
)

chrom_assay <- CreateChromatinAssay(
  counts = counts,
  sep = c(":", "-"),
  fragments = '/SGRNJ06/randd/PROJECT/scATAC/20231006_ME_sc/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/03.atac/A0920_3_ME_YSPB_WX_D_T7_EDTA5030/outs//fragments.tsv.gz',
  min.cells = 10,
  min.features = 200
)

pbmc.atac <- CreateSeuratObject(
  counts = chrom_assay,
  assay = "peaks",
  meta.data = metadata
)

In [None]:
head(pbmc.atac)

In [None]:
pbmc.atac[['peaks']]

In [None]:
rds = readRDS("/SGRNJ03/randd/lims_result_rd/MultiResult/RD20081701_231103_mus_Embryo/project_batch/2023-11-07/hji1y7h7ve/father_cluster/batch_1/call-integration/RD20081701_231103_mus_Embryo.diff_PRO.rds")

In [None]:
# Perform standard analysis of each modality independently RNA analysis
pbmc.rna <- NormalizeData(rds)
pbmc.rna <- FindVariableFeatures(pbmc.rna)
pbmc.rna <- ScaleData(pbmc.rna)
pbmc.rna <- RunPCA(pbmc.rna)
pbmc.rna <- RunUMAP(pbmc.rna, dims = 1:30)

In [None]:
UMAPPlot(pbmc.rna)

In [None]:
# ATAC analysis add gene annotation information
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Mmusculus.v79)
seqlevelsStyle(annotations) <- "UCSC"
genome(annotations) <- "mouse"
Annotation(pbmc.atac) <- annotations

In [None]:
# We exclude the first dimension as this is typically correlated with sequencing depth
pbmc.atac <- RunTFIDF(pbmc.atac)
pbmc.atac <- FindTopFeatures(pbmc.atac, min.cutoff = "q0")

In [None]:
pbmc.atac <- RunSVD(pbmc.atac)
pbmc.atac <- RunUMAP(pbmc.atac, reduction = "lsi", dims = 2:30, reduction.name = "umap.atac", reduction.key = "atacUMAP_")

In [None]:
head(pbmc.rna)

In [None]:
p1 <- DimPlot(pbmc.rna, group.by = "cluster", label = TRUE) + NoLegend() + ggtitle("RNA")
p2 <- DimPlot(pbmc.atac, group.by = "orig.ident", label = FALSE) + NoLegend() + ggtitle("ATAC")

In [None]:
p1 + p2

In [None]:
plot <- (p1 + p2) & xlab("UMAP 1") & ylab("UMAP 2") & theme(axis.title = element_text(size = 18))

In [None]:
ggsave(filename = "/SGRNJ06/randd/USER/cjj/celedev/atac/20231108scrna_atac/atacseq_integration_vignette.jpg", height = 7, width = 12, plot = plot,
    quality = 50)

In [None]:
# Identifying anchors between scRNA-seq and scATAC-seq datasets

In [None]:
# quantify gene activity
gene.activities <- GeneActivity(pbmc.atac, features = VariableFeatures(pbmc.rna))

# add gene activities as a new assay
pbmc.atac[["ACTIVITY"]] <- CreateAssayObject(counts = gene.activities)

# normalize gene activities
DefaultAssay(pbmc.atac) <- "ACTIVITY"
pbmc.atac <- NormalizeData(pbmc.atac)
pbmc.atac <- ScaleData(pbmc.atac, features = rownames(pbmc.atac))

In [None]:
# Identify anchors
transfer.anchors <- FindTransferAnchors(reference = pbmc.rna, query = pbmc.atac, features = VariableFeatures(object = pbmc.rna),
    reference.assay = "RNA", query.assay = "ACTIVITY", reduction = "cca")

In [None]:
# Annotate scATAC-seq cells via label transfer
celltype.predictions <- TransferData(anchorset = transfer.anchors, refdata = pbmc.rna$cluster,
    weight.reduction = pbmc.atac[["lsi"]], dims = 2:30)

In [None]:
pbmc.atac <- AddMetaData(pbmc.atac, metadata = celltype.predictions)

In [None]:
head(pbmc.atac)

In [None]:
celltype.predictions

In [None]:
# pbmc.atac$annotation_correct <- pbmc.atac$predicted.id == pbmc.atac$celltype.predictions

In [None]:
p1 <- DimPlot(pbmc.atac, label.size=5 ,group.by = "predicted.id", label = TRUE)  + ggtitle("Predicted annotation")

In [None]:
p1

In [None]:
p2 <- DimPlot(pbmc.atac, group.by = "orig.ident", label = FALSE) + NoLegend() + ggtitle("ATAC")

In [None]:
p2

In [None]:
ggsave(filename = "/SGRNJ06/randd/USER/cjj/celedev/atac/20231108scrna_atac/predicted_celltype.pdf", height = 14, width = 24, plot = p1)

In [None]:
ggsave(filename = "/SGRNJ06/randd/USER/cjj/celedev/atac/20231108scrna_atac/atac.pdf", height = 14, width = 24, plot = p2)

In [None]:
ggsave(filename = "/SGRNJ06/randd/USER/cjj/celedev/atac/20231108scrna_atac/atac.pdf", height = 14, width = 24, plot = p2)

In [None]:
p2 <- DimPlot(pbmc.atac, group.by = "seurat_annotations", label = TRUE) + NoLegend() + ggtitle("Ground-truth annotation")

In [None]:
p1 | p2

In [None]:
predictions <- table(pbmc.atac$seurat_annotations, pbmc.atac$predicted.id)

In [None]:
predictions <- predictions/rowSums(predictions)  # normalize for number of cells in each cell type

In [None]:
predictions <- as.data.frame(predictions)

In [None]:
p1 <- ggplot(predictions, aes(Var1, Var2, fill = Freq)) + geom_tile() + scale_fill_gradient(name = "Fraction of cells",
    low = "#ffffc8", high = "#7d0025") + xlab("Cell type annotation (RNA)") + ylab("Predicted cell type label (ATAC)") +
    theme_cowplot() + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))

In [None]:
correct <- length(which(pbmc.atac$seurat_annotations == pbmc.atac$predicted.id))

In [None]:
incorrect <- length(which(pbmc.atac$seurat_annotations != pbmc.atac$predicted.id))

In [None]:
data <- FetchData(pbmc.atac, vars = c("prediction.score.max", "annotation_correct"))

In [None]:
p2 <- ggplot(data, aes(prediction.score.max, fill = annotation_correct, colour = annotation_correct)) +
    geom_density(alpha = 0.5) + theme_cowplot() + scale_fill_discrete(name = "Annotation Correct",
    labels = c(paste0("FALSE (n = ", incorrect, ")"), paste0("TRUE (n = ", correct, ")"))) + scale_color_discrete(name = "Annotation Correct",
    labels = c(paste0("FALSE (n = ", incorrect, ")"), paste0("TRUE (n = ", correct, ")"))) + xlab("Prediction Score")

In [None]:
p1 + p2

In [None]:
# Co-embedding scRNA-seq and scATAC-seq datasets

In [None]:
# note that we restrict the imputation to variable genes from scRNA-seq, but could impute the
# full transcriptome if we wanted to
genes.use <- VariableFeatures(pbmc.rna)
refdata <- GetAssayData(pbmc.rna, assay = "RNA", slot = "data")[genes.use, ]

# refdata (input) contains a scRNA-seq expression matrix for the scRNA-seq cells.  imputation
# (output) will contain an imputed scRNA-seq matrix for each of the ATAC cells
imputation <- TransferData(anchorset = transfer.anchors, refdata = refdata, weight.reduction = pbmc.atac[["lsi"]],
    dims = 2:30)

In [None]:
pbmc.atac[["RNA"]] <- imputation

In [None]:
coembed <- merge(x = pbmc.rna, y = pbmc.atac)

# Finally, we run PCA and UMAP on this combined object, to visualize the co-embedding of both
# datasets
coembed <- ScaleData(coembed, features = genes.use, do.scale = FALSE)
coembed <- RunPCA(coembed, features = genes.use, verbose = FALSE)
coembed <- RunUMAP(coembed, dims = 1:30)

DimPlot(coembed, group.by = c("orig.ident", "seurat_annotations"))