In [None]:
library(Seurat)
library(Signac)
library(GenomeInfoDb)
library(EnsDb.Mmusculus.v79)
library(ggplot2)
library(ggpubr)
library(glue)
library(scDblFinder)
library(RColorBrewer)
library(dplyr)
library(ggridges)
library(CopyscAT)
library(BSgenome.Mmusculus.UCSC.mm10)
library(harmony)
set.seed(123)


# The goals of this workflow are:
## 1. to subset the ATAC data to only include the cells that are also present in the RNA data, so that the DNA accessibility data can be used to inform the RNA data (cDC1 only), and identify the cell types in the ATAC data using the RNA data as a reference (transfer labels)
## 2. Normalize, find PCAs, and find clusters in the atac_subset_gex object

# <span style="color:green"> Part 1-a: Subset ATAC object by common cells from GEX <span>

## merge atac samples

In [None]:
wt1 <- readRDS("WT1_atac_QCfiltered.RDS")
wt2 <- readRDS("WT2_atac_QCfiltered.RDS")
ko1 <- readRDS("KO1_atac_QCfiltered.RDS")
ko2 <- readRDS("KO2_atac_QCfiltered.RDS")

In [None]:
dim(wt1)
dim(wt2)
dim(ko1)
dim(ko2)

In [None]:
wt1$origin <- "WT1"
wt2$origin <- "WT2"
ko1$origin <- "KO1"
ko2$origin <- "KO2"

In [None]:
wt1$experimental_groups <- "WT"
wt2$experimental_groups <- "WT"
ko1$experimental_groups <- "KO"
ko2$experimental_groups <- "KO"

In [None]:
obj <- merge(x = wt1, 
            y = c(wt2, ko1, ko2), 
            add.cell.ids = c('wt1', 'wt2', 'ko1', 'ko2'))

In [None]:
dim(obj)

In [None]:
table(obj$origin)

In [None]:
head(obj, 3)

## read in cDC1-only GEX object

In [None]:
seur <- readRDS("gex/GEX_cDC1_clustered.RDS")

In [None]:
dim(seur)

## Subset ATAC object based on shared cells in cDC1-only GEX object

In [None]:
head(seur, 3)

### the GEX object has different cell_id (barcode) annotation, where each cell_id is ended with "KO1_matrix" or "KO2_matrix" etc.
### so create a new column in the GEX object to match the cell_id in the ATAC object

In [None]:
atac_cellID <- colnames(seur)

head(atac_cellID)

In [None]:

atac_cellID <- sapply(atac_cellID, function(name) {
  if (grepl("\\.KO1_matrix$", name)) {       
      paste0("ko1_", sub("\\.KO1_matrix$", "", name))    
      } else if (grepl("\\.KO2_matrix$", name)) {
      paste0("ko2_", sub("\\.KO2_matrix$", "", name))
      } else if (grepl("\\.WT1_matrix$", name)) {
      paste0("wt1_", sub("\\.WT1_matrix$", "", name))
      } else if (grepl("\\.WT2_matrix$", name)) {
      paste0("wt2_", sub("\\.WT2_matrix$", "", name))        
      } else {
      name  # Keep original name if no suffix matches
  }
})

atac_cellID <- unname(atac_cellID)
head(atac_cellID)

In [None]:
seur$atac_cellID <- atac_cellID

In [None]:
saveRDS(seur, "gex/GEX_cDC1_clustered.RDS")

### Identify shared cells

In [None]:
shared_cells <- intersect(seur$atac_cellID, colnames(obj))
length(shared_cells)

In [None]:
# Subset atac based on shared cells
atac_subGex_obj <- subset(obj, cells = shared_cells)
dim(atac_subGex_obj)


In [None]:
atac_subGex_obj

In [None]:
DefaultAssay(atac_subGex_obj)

# <span style="color:green"> Part 1-b: Annotate the cells in the atac_subGex object by transfering the celltype labels from GEX object

In [None]:
gex_meta <- seur@meta.data
head(gex_meta, 3)

### extract cell type labels from seur

In [None]:

celltype_labels <- data.frame(
  cellID = seur$atac_cellID,
  celltype_annotation = seur$celltype_annotation
)


In [None]:
rownames(celltype_labels) <- celltype_labels$cellID

### Subset the celltype_labels to the shared cells

In [None]:
celltype_labels <- celltype_labels[celltype_labels$cellID %in% shared_cells, ]
dim(celltype_labels)

In [None]:
atacCellID <- Cells(atac_subGex_obj)
length(atacCellID)


### Make sure the labels are presented in the the atac_subGex_obj in the right order

In [None]:
all(rownames(celltype_labels) %in% colnames(atac_subGex_obj))
identical(rownames(celltype_labels), colnames(atac_subGex_obj))


In [None]:
any(duplicated(rownames(celltype_labels)))


In [None]:
celltype_labels <- celltype_labels[match(atacCellID, rownames(celltype_labels)), ]


In [None]:
head(celltype_labels)

In [None]:
identical(rownames(celltype_labels), colnames(atac_subGex_obj))


### Transfer label

In [None]:
celltype_annotations <- celltype_labels$celltype_annotation
  names(celltype_annotations) <- rownames(celltype_labels)

atac_subGex_obj <- AddMetaData(atac_subGex_obj, metadata = celltype_annotations, col.name = "celltype_annotation")

In [None]:
head(atac_subGex_obj, 3)

# <span style="color:green"> Part 2: Normalize, correct batch effects, and find clusters in the atacSubGex obj <span>

In [None]:
atac_subGex_obj <- RunTFIDF(atac_subGex_obj) %>% 
    FindTopFeatures(min.cutoff = 'q0') %>% 
    RunSVD(assay = "peaks", reduction.key = "LSI_", reduction.name = "lsi")  


In [None]:
options(repr.plot.width=7, repr.plot.height=7)

ElbowPlot(atac_subGex_obj, ndims = 25, reduction = "lsi")

In [None]:
# Chose 2:7 dimensions based on this

In [None]:
atac_subGex_obj <- RunUMAP(atac_subGex_obj, dims = 2:7, reduction = 'lsi', verbose = FALSE)

In [None]:
atac_subGex_obj <- FindNeighbors(object = atac_subGex_obj, reduction = 'lsi', dims = 2:7, verbose = FALSE) %>%
    FindClusters(verbose = FALSE, algorithm = 3, resolution = 0.05) %>%
    RunUMAP(reduction = 'lsi', dims = 2:7, verbose = FALSE)

In [None]:
table(Idents(atac_subGex_obj))

In [None]:
table(atac_subGex_obj$celltype_annotation)

In [None]:
options(repr.plot.width=12, repr.plot.height=12)

one = DimPlot(atac_subGex_obj, label = T, cols = c("#FF5733","#C70039"))
two = DimPlot(atac_subGex_obj, group.by = "origin", cols = c( "#FF1493", "#C71585", "#A9A9A9", "#808080"))
three = DimPlot(atac_subGex_obj, group.by = "celltype_annotation", cols = c("#C70039", "#FFC300", "#FF5733"))

ggarrange(one, two, three)

In [None]:
saveRDS(atac_subGex_obj, "atac_subGex_clustered_obj.RDS")

In [None]:
table(atac_subGex_obj$origin)