In [None]:
#Load libraries
library(Signac)
library(GenomicRanges)
library(future)
library(EnsDb.Mmusculus.v79)
library(BSgenome.Mmusculus.UCSC.mm10)
#

#Load peak data as GRanges
sampleNames<-c("ctrl1","ctrl2","ko1","ko2")
sample_loc<-"new_h5/"
peaks<-lapply(sampleNames,function(x){makeGRangesFromDataFrame(read.table(
      file=paste0(sample_loc,x,"/atac_peaks.bed"),
      col.names = c("chr", "start", "end")
      ))}
      )
## Create a unified set of peaks to quantify in each dataset
combined.peaks <- reduce(x = c(peaks[[1]],peaks[[2]],peaks[[3]],peaks[[4]])) #There's probably a better way to do this



In [None]:
# Filter out bad peaks based on length if necessary.
peakwidths <- width(combined.peaks)
summary(peakwidths) # Not for this dataset

#Create fragment objects
md<-lapply(sampleNames,function(x)read.table(
                                  paste0(sample_loc,x,"/filtered_feature_bc_matrix/barcodes.tsv.gz"))$V1
                                  )
names(md)<-sampleNames

frags<-lapply(sampleNames,function(x)CreateFragmentObject(
  paste0(sample_loc,x,"/atac_fragments.tsv.gz")
  ,cells = md[[x]])
  )
names(frags)<-sampleNames

#Quantify peaks in each dataset
peaks.counts<-lapply(names(frags), function(frag)FeatureMatrix(fragments=frags[[frag]],features=combined.peaks,cells=md[[frag]]))
names(peaks.counts)<-sampleNames


In [None]:
#Get annotation for mm10
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Mmusculus.v79)
seqlevelsStyle(annotation) <- "UCSC"


In [None]:

# set up sample loop to load the RNA data,merge with ATAC and save to seurat object
samples<-list()
for(i in sampleNames){
  setwd(paste0(sample_loc,i,"/"))
  counts <- Read10X_h5("filtered_feature_bc_matrix.h5") #count data

  # create a Seurat object containing the RNA adata
  dat <- CreateSeuratObject(
    counts = counts$`Gene Expression`,
    assay = "RNA"
  )
  
  # create ATAC assay and add it to the object
  dat[["ATAC"]] <- CreateChromatinAssay(
    counts = peaks.counts[[i]],
    sep = c(":", "-"),
    fragments = frags[[i]],
    annotation = annotation
  )
  samples[[i]]<-dat
}
#Merge all in one seurat object

combined <- merge(
  x = samples[[1]],
  y = samples[-1],
  add.cell.ids = c("Ctrl1", "Ctrl2", "KO1", "KO2")
)

#ATAC QC
DefaultAssay(combined) <- "ATAC"

combined <- NucleosomeSignal(combined)
combined <- TSSEnrichment(combined)

VlnPlot(
  object = combined,
  features = c("nCount_RNA", "nCount_ATAC", "TSS.enrichment", "nucleosome_signal"),
  ncol = 4,
  pt.size = 0
)

# The filtering thresholds are arbitrary; better approach is probably necessary
combined <- subset(
  x = combined,
  subset = nCount_ATAC < 50000 &
    nCount_RNA < 25000 &
    nCount_ATAC > 1000 &
    nCount_RNA > 1000 &
    nucleosome_signal < 2 &
    TSS.enrichment > 1
)

#Standard multiome normalization/dr analysis

DefaultAssay(combined) <- "RNA"
combined <- SCTransform(combined)
combined <- RunPCA(combined)

combined <- RunTFIDF(combined)
combined <- FindTopFeatures(combined, min.cutoff = 20)
combined <- RunSVD(combined)
combined <- RunUMAP(combined, dims = 1:30, reduction = 'lsi')

#Cell type assignment from reference
library(scRNAseq)
library(SeuratDisk)
library(zellkonverter)
reference1<-readH5AD("/data/user/apapada1/Downloads/kidney.h5ad")
meta<-as.data.frame(reference1@colData)
reference<-CreateSeuratObject(counts=assay(reference1,"X"),meta.data = as.data.frame(colData(reference1)))
reference<-SCTransform(reference)
reference <- RunPCA(reference)
reference <- RunUMAP(reference, dims = 1:30)
DimPlot(combined, group.by = 'dataset', pt.size = 0.1,reduction="lsi")
DimPlot(reference, group.by = 'cell_type', pt.size = 0.1)

#Anchor transfer and predictions
transfer_anchors <- FindTransferAnchors(
  reference = reference1,
  query = combined,
  normalization.method = "LogNormalize",
  recompute.residuals = FALSE,
  dims = 1:30)
predictions <- TransferData(
  anchorset = transfer_anchors, 
  refdata = reference1$type,
  weight.reduction = combined[['pca']],
  dims = 1:30
)
combined <- AddMetaData(
  object = combined,
  metadata = predictions
)# You can filter based on prediction score here.

#Multimodal umap

combined<- FindMultiModalNeighbors(
  object = combined,
  reduction.list = list("pca", "lsi"), 
  dims.list = list(1:30, 2:40),
  modality.weight.name = "RNA.weight",
  verbose = TRUE
)
combined <- RunUMAP(
  object = combined,
  nn.name = "weighted.nn",
  assay = "RNA",
  verbose = TRUE
)

#Keep main chromosome peaks
library(BSgenome.Mmusculus.UCSC.mm10)
main.chroms <- standardChromosomes(BSgenome.Mmusculus.UCSC.mm10)
keep.peaks <- as.logical(seqnames(granges(combined)) %in% main.chroms)
combined[["ATAC"]] <- subset(combined[["ATAC"]], features = rownames(combined[["ATAC"]])[keep.peaks])

#Coverage plots

combined <- LinkPeaks(
  object = combined,
  peak.assay = "ATAC",
  expression.assay = "SCT",
  genes.use = c("Nphs1", "Wt1")
)
Idents(combined) <- "predicted.id"
p1 <- CoveragePlot(
  object = combined,
  region = "Nphs1",
  features = "Nphs1",
  expression.assay = "SCT",
  extend.upstream = 500,
  extend.downstream = 10000
)
p2 <- CoveragePlot(
  object = combined,
  region = "Wt1",
  features = "Wt1",
  expression.assay = "SCT",
  extend.upstream = 500,
  extend.downstream = 10000
)
