## Creates binarized Seurat objects, integrates conditions and annotates genes by nearby peaks

In [None]:
# Input info
outdir =  "/data2/mito_lineage/output/annotation/data/CHIP_april08_2021/MTblacklist/" 
sample_names = "Control,Flt3l,Input"
samples = "Control,Flt3l,Input"
lsi_start_comp = 2

# Parameters
nTop = 25000
to.filt.cells = "FALSE"

## QC parameters
min_peak_region_fragments=1000
max_peak_region_fragments=10000
min_pct_reads_in_peaks=20
max_blacklist_ratio=0.05
max_nucleosome_signal=15
min_TSS_enrichment=0.2

cores = 24

In [None]:
if ((to.filt.cells == "T") | (to.filt.cells == "TRUE")){
    to.filt.cells = TRUE
}else{to.filt.cells = FALSE}


In [None]:
library(repr)
options(repr.plot.width=12, repr.plot.height=12)

In [None]:
samples <- unlist(strsplit(samples, ",")[[1]])
sample_names <- unlist(strsplit(sample_names, ","))

samples

In [None]:
library(GenomicRanges)
library(Seurat)
library(Signac)
library(GenomeInfoDb)
library(EnsDb.Hsapiens.v75)
library(ggplot2)
library(patchwork)
set.seed(1234)
library(data.table)
library(magrittr)
library(cowplot)
library(metap)
library(dplyr)
library("patchwork")
library(future)

plan("multiprocess", workers = cores)
options(future.globals.maxSize = 8000 * 1024^2)
#options(future.globals.maxSize = 50000 * 1024^2) # for 50 Gb RAM
#plan("multiprocess", workers = workers)

In [None]:
allSE <- readRDS(file.path(outdir, paste0("allSamples.rds")))

## Filter cells if parameters there

In [None]:
filtCells <- function(se, min_peak_region_fragments=10,
                      max_peak_region_fragments=15000,
                     min_pct_reads_in_peaks=15,
                     max_blacklist_ratio=0.05,
                     max_nucleosome_signal=4,
                     min_TSS_enrichment=2){
    se <- subset(
      x = se,
      subset = peak_region_fragments > min_peak_region_fragments &
               peak_region_fragments < max_peak_region_fragments &
               pct_reads_in_peaks > min_pct_reads_in_peaks &
               blacklist_ratio < max_blacklist_ratio &
               nucleosome_signal < max_nucleosome_signal  &
               TSS.enrichment > min_TSS_enrichment
    )
        return(se)
}

In [None]:
if (to.filt.cells){
    c <- lapply(allSE, filtCells,
                    se, min_peak_region_fragments=min_peak_region_fragments,
                      max_peak_region_fragments=max_peak_region_fragments,
                     min_pct_reads_in_peaks=min_pct_reads_in_peaks,
                     max_blacklist_ratio=max_blacklist_ratio,
                     max_nucleosome_signal=max_nucleosome_signal,
                     min_TSS_enrichment=min_TSS_enrichment)
}

### plot qc

# Merge:
- get umap and clusters

In [None]:
# Add sample names to cell prefix here.
for (i in 1:length(allSE)){
    print(i)
    curr.SE <- allSE[[i]]
    allSE[[i]] <- RenameCells(allSE[[i]], 
                    paste(curr.SE$orig.ident,colnames(curr.SE), sep="_")
                    )
}

combined <-  merge(
  x = allSE[[1]],
  #y = allSE[[3]],
  y = allSE[[2]]
  #add.cell.ids = sample_names[1:2]
  )


if (length(sample_names) > 2) {
    for (i in 3:length(sample_names)){
        combined <- merge(x=combined,
                             y = allSE[[i]])
        }
}

In [None]:
combined <- FindTopFeatures(combined, min.cutoff = 20)
combined

In [None]:
combined$orig.ident <- factor(combined$orig.ident, levels = sample_names)

VlnPlot(
  object = combined,
  features = c('nCount_ATAC', 'peak_region_fragments', 'passed_filters',
               'duplicate', 'unmapped'),
  split.by = "orig.ident",
  pt.size = 0.1,
  ncol = 3
)

In [None]:
# Binarize and run LSI
combined <- BinarizeCounts(combined)
combined <- RunTFIDF(combined)
combined <- RunSVD(combined)
combined <- RunUMAP(combined, dims = lsi_start_comp:50, reduction = 'lsi')
DimPlot(combined, group.by = "proj", pt.size = 0.1)

In [None]:
pDepthCorr <- DepthCor(combined)
pDepthCorr

In [None]:
allSE <- lapply(sample_names,  function(x) subset(combined, subset = orig.ident == x))
allSE

### QC first

In [None]:
library(Rsamtools)

qc <- function(se){
    # extract gene annotations from EnsDb
    annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v75)

    # change to UCSC style since the data was mapped to hg19
    seqlevelsStyle(annotations) <- 'UCSC'
    genome(annotations) <- "hg38"
    # add the gene information to the object
    Annotation(se) <- annotations
    gene.activities <- GeneActivity(se)
    # compute nucleosome signal score per cell
    se <- NucleosomeSignal(object = se)
    
    Annotation(se) <- annotations
    # compute TSS enrichment score per cell
    se <- TSSEnrichment(object = se, fast = FALSE)

    # add blacklist ratio and fraction of reads in peaks
    se$pct_reads_in_peaks <- se$peak_region_fragments / se$passed_filters * 100
    se$blacklist_ratio <- se$blacklist_region_fragments / se$peak_region_fragments
    se$high.tss <- ifelse(se$TSS.enrichment > 2, 'High', 'Low')
    se$nucleosome_group <- ifelse(se$nucleosome_signal > 4, 'NS > 4', 'NS < 4')

    return(se)
}
vPlot <- function(se){
      vPlot <- VlnPlot(
      object = se,
      features = c('pct_reads_in_peaks', 'peak_region_fragments',
                   'TSS.enrichment', 'blacklist_ratio', 'nucleosome_signal'),
      pt.size = 0.1,
      ncol = 5
    )  
    vPlot <- vPlot +    # Create grid of plots with title
             plot_annotation(title = se$orig.ident[[1]]) & 
             theme(plot.title = element_text(hjust = 0.5, size=15))
    #print(vPlot)
    return(vPlot)
}

lapply(allSE,vPlot)
ggsave(file.path(outdir, paste0("QC_02.png")))
ggsave(file.path(outdir, paste0("QC_02.pdf")))
allSE <- lapply(allSE, qc)
allSE
ggsave(file.path(outdir,"integrated.batch.png"), dpi=300)


# Integrate

In [None]:
# find integration anchors
integration.anchors <- FindIntegrationAnchors(
  object.list = allSE, #c(ext,curr),
  anchor.features = allSE[[1]], #rownames(ext),
  reduction = "rlsi",
  dims = lsi_start_comp:30
)

# integrate LSI embeddings
integrated <- IntegrateEmbeddings(
  anchorset = integration.anchors,
  reductions = combined[["lsi"]],
  new.reduction.name = "integrated_lsi",
  dims.to.integrate = 1:30
)



## Run UMAP and plot

In [None]:
p1 <- DimPlot(combined, group.by = "proj")

In [None]:
# create a new UMAP using the integrated embeddings
integrated <- RunUMAP(integrated, reduction = "integrated_lsi", dims = 2:30)
p2 <- DimPlot(integrated, group.by = "proj")
ggsave(file.path(outdir,"integrated.batch.png"), dpi=300)
p2

### Compare merged and integrated

In [None]:
(p1 + ggtitle("Merged")) | (p2 + ggtitle("Integrated"))
ggsave(file.path(outdir,"integrated.merged.compare.png"), dpi=300)

## Correlation of LSI components and Depth

In [None]:
pDepthCorr <- DepthCor(integrated, reduction='integrated_lsi')
ggsave(file.path(outdir,"integrated.depthCor.png"), plot=pDepthCorr, dpi=300)

pDepthCorr

## Cluster and plot

In [None]:
#integrated <- RunUMAP(object = integrated, reduction = 'integrated_lsi', dims = 2:30)
integrated <- FindNeighbors(object = integrated, reduction = 'integrated_lsi', dims = 2:30)
integrated <- FindClusters(object = integrated, verbose = FALSE, algorithm = 3)

pclust <- DimPlot(object = integrated, label = TRUE) + NoLegend()
ggsave(file.path(outdir, "integrated.lsi.clusters.png"), pclust)
pclust

## Get gene activity results 

In [None]:
# extract gene annotations from EnsDb
annotations <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v75)

# change to UCSC style since the data was mapped to hg19
seqlevelsStyle(annotations) <- 'UCSC'
genome(annotations) <- "hg38"

# add the gene information to the object
Annotation(integrated) <- annotations


gene.activities <- GeneActivity(integrated)


In [None]:
# add the gene activity matrix to the Seurat object as a new assay and normalize it
integrated[['RNA']] <- CreateAssayObject(counts = gene.activities)
integrated <- NormalizeData(
  object = integrated,
  assay = 'RNA',
  normalization.method = 'LogNormalize',
  scale.factor = median(integrated$nCount_RNA)
)

In [None]:
DefaultAssay(integrated) <- 'RNA'

In [None]:
saveRDS(integrated, file.path(outdir, paste0("allSamples.integrated.rds")))

In [None]:
sessionInfo()