In [None]:
library(BiocManager)
library(BSgenome.Hsapiens.UCSC.hg38)
library(ArchR)
library(ggplot2)
library(TFBSTools)
library(Seurat)
library(ggplot2)
library(dplyr)
library(harmony)
library(Signac)
library(BSgenome.Hsapiens.UCSC.hg38)
library(JASPAR2018)
library(edgeR)
library(sctransform)
data("geneAnnoHg38")
data("genomeAnnoHg38")
geneAnno <- geneAnnoHg38
genomeAnno <- genomeAnnoHg38
addArchRThreads(24)


#may be dont run this for getting cnv calls
fn <- unclass(lsf.str(envir = asNamespace("ArchR"), all = TRUE))
  for(i in seq_along(fn)){
    tryCatch({
      eval(parse(text=paste0(fn[i], '<-ArchR:::', fn[i])))
    }, error = function(x){
    })
  }

In [None]:
#Loading the archr project to compute the 10MB CNV-ATAC matrix 
proj <- loadArchRProject('ArchR project of interest')

In [None]:
#contains help functions computing the CNV matrix
source("TCGA_Helper.R")

#adding a custom code in ArchR to add CNV matrix
.addCNVMatrix<-function (i = NULL, ArrowFiles = NULL, normByNeighbors = FALSE, 
    cellNames = NULL, allCells = NULL, windows = NULL, force = FALSE, 
    tstart = NULL, subThreads = NULL, logFile = NULL) 
{
    ArrowFile <- ArrowFiles[i]
    o <- h5closeAll()
    o <- h5closeAll()
    o <- .createArrowGroup(ArrowFile = ArrowFile, group = "Use_CNVMatrix_10MB_GCsmooth", 
        force = force, logFile = logFile)
    tstart <- Sys.time()
    if (is.null(cellNames)) {
        cellNames <- .availableCells(ArrowFile)
    }
    if (!is.null(allCells)) {
        cellNames <- cellNames[cellNames %in% allCells]
    }
    uniqueChr <- as.character(unique(seqnames(windows)@values))
    seWindows <- .safelapply(seq_along(uniqueChr), function(x) {
        o <- h5closeAll()
        chr <- uniqueChr[x]
        windowsx <- windows[BiocGenerics::which(seqnames(windows) == 
            chr)]
        rangesx <- ranges(windowsx)
        .messageDiffTime(sprintf("Counting Windows for Chromosome %s of %s!", 
            x, length(uniqueChr)), tstart)
        fragments <- .getFragsFromArrow(ArrowFile, chr = chr, 
            out = "IRanges", cellNames = cellNames)
        temp <- IRanges(start = start(fragments), width = 1)
        stopifnot(length(temp) == length(fragments))
        oleft <- findOverlaps(ranges(rangesx), temp)
        oleft <- DataFrame(queryHits = Rle(queryHits(oleft)), 
            subjectHits = subjectHits(oleft))
        temp <- IRanges(start = end(fragments), width = 1)
        stopifnot(length(temp) == length(fragments))
        oright <- findOverlaps(ranges(rangesx), temp)
        oright <- DataFrame(queryHits = Rle(queryHits(oright)), 
            subjectHits = subjectHits(oright))
        remove(temp)
        oleft$subjectHits <- as.integer(BiocGenerics::match(mcols(fragments)$RG[oleft$subjectHits], 
            cellNames))
        oright$subjectHits <- as.integer(BiocGenerics::match(mcols(fragments)$RG[oright$subjectHits], 
            cellNames))
        remove(fragments)
        #
        mat <- Matrix::sparseMatrix(i = c(oleft$queryHits, oright$queryHits), 
            j = c(oleft$subjectHits, oright$subjectHits), x = rep(1, 
                nrow(oleft) + nrow(oright)), dims = c(length(rangesx), 
                length(cellNames)))
        colnames(mat) <- cellNames
        windowSummary <- GRanges()
        countSummary <- matrix(nrow = length(unique(mcols(windowsx)$name)), 
            ncol = ncol(mat))
        rownames(countSummary) <- unique(mcols(windowsx)$name)
        colnames(countSummary) <- cellNames
        for (y in seq_len(nrow(countSummary))) {
            idx <- which(mcols(windowsx)$name == rownames(countSummary)[y])
            wx <- windowsx[idx]
            wo <- GRanges(mcols(wx)$wSeq, ranges = IRanges(mcols(wx)$wStart, 
                mcols(wx)$wEnd))[1, ]
            mcols(wo)$name <- mcols(wx)$name[1]
            mcols(wo)$effectiveLength <- sum(width(wx))
            mcols(wo)$percentEffectiveLength <- 100 * sum(width(wx))/width(wo)
            mcols(wo)$GC <- sum(mcols(wx)$GC * width(wx))/width(wo)
            mcols(wo)$AT <- sum(mcols(wx)$AT * width(wx))/width(wo)
            mcols(wo)$N <- sum(mcols(wx)$N * width(wx))/width(wo)
            countSummary[y, ] <- Matrix::colSums(mat[idx, , drop = FALSE])/sum(width(wx))
            windowSummary <- c(windowSummary, wo)
        }
        seqlevels(windowSummary) <- uniqueChr
        SummarizedExperiment::SummarizedExperiment(assays = SimpleList(counts = countSummary), 
            rowData = windowSummary)
    }, threads = 1) %>% Reduce("rbind", .)
    .messageDiffTime("Filtering Low Quality Windows", tstart)
    seWindows <- seWindows[which(mcols(seWindows)$N < 0.001)]
    seWindows <- seWindows[rowData(seWindows)$percentEffectiveLength >= 
        90]
    .messageDiffTime("Normalizing GC-Bias", tstart)
    k <- 25
    seWindows <- seWindows[order(mcols(seWindows)$GC)]
    assays(seWindows)$log2GCNorm <- log2((assays(seWindows)$counts + 
        1e-05)/(apply(assays(seWindows)$counts, 2, function(x) .centerRollMean(x, 
        k)) + 1e-05))
    if (normByNeighbors) {
        .messageDiffTime("Normalizing Coverage-Bias using Neighbors", 
            tstart)
        totalCounts <- colSums(assays(seWindows)$counts * rowData(seWindows)$effectiveLength)
        seWindows <- seWindows[, order(totalCounts)]
        assays(seWindows)$log2GCNorm <- assays(seWindows)$log2GCNorm - 
            t(apply(assays(seWindows)$log2GCNorm, 1, function(y) .centerRollMean(y, 
                floor(0.025 * ncol(seWindows)))))
        seWindows <- seWindows[, cellNames]
    }
    .messageDiffTime("Smoothing CNV Scores", tstart)
    seWindows <- sort(sortSeqlevels(seWindows))
    temp_smooth<-apply(assays(seWindows)$log2GCNorm, 
            2, function(y) .centerRollMean(y, ceiling((windowSize/stepSize)/2)))
    assays(seWindows,withDimnames=FALSE)$log2GCSmooth<-temp_smooth
    windows <- lapply(split(rowRanges(seWindows), seqnames(seWindows)), 
        function(x) {
            mcols(x)$idx <- seq_along(x)
            x
        }) %>% Reduce("c", .) %>% sortSeqlevels %>% sort
    rowRanges(seWindows) <- windows[rownames(seWindows)]
    .messageDiffTime("Adding to Arrow Files", tstart)
    dfParams <- data.frame(windowSize = windowSize, stepSize = stepSize, excludeChr = c("chrM","chrY"),
         stringsAsFactors = FALSE)
    featureDF <- data.frame(seqnames = paste0(seqnames(seWindows)), 
        idx = mcols(seWindows)$name, start = start(seWindows), 
        end = end(seWindows), name = mcols(seWindows)$name, GC = mcols(seWindows)$GC, 
        effectiveLength = mcols(seWindows)$effectiveLength, stringsAsFactors = FALSE)
    featureDF$idx<-seq_len(nrow(featureDF))
    featureDF1<-featureDF          
    featureDF <- do.call(
      rbind.data.frame,
      by(featureDF1, featureDF1$seqnames, function(df) { df$idx <- seq_len(nrow(df)); df; })
    )
    rownames(featureDF)<-featureDF$name
    o <- .initializeMat(ArrowFile = ArrowFile, Group = "Use_CNVMatrix_10MB_GCsmooth", 
        Class = "Double", cellNames = cellNames, params = dfParams, 
        featureDF = featureDF, force = force)
    uniqueChr <- unique(paste0(seqnames(seWindows)))
    seWindows <- seWindows[, cellNames]
    rownames(seWindows)<-rownames(featureDF)
    rowData(seWindows)<-featureDF
    for (x in seq_along(uniqueChr)) {
        print(x)
         o <- .addMatToArrow(mat = Matrix::Matrix(assays(seWindows[BiocGenerics::which(seqnames(seWindows) == 
            uniqueChr[x])])$log2GCSmooth, sparse = TRUE), ArrowFile = ArrowFile, 
            Group = paste0("Use_CNVMatrix_10MB_GCsmooth/", uniqueChr[x]), binarize = FALSE)
        gc()
    }
    return(ArrowFile)
}

In [None]:
windowSize = 10e+06
stepSize = 20e+05

out <- addCNVMatrix(
	input = getArrowFiles(proj),
	chromSizes = getChromSizes(proj), 
	blacklist = getBlacklist(proj), 
	genome = getGenome(proj),
	force = TRUE,windowSize = 10e+06, stepSize = 20e+05,
    threads=20
)

In [None]:
#Plotting some regions of the 10MB CNV atac matrix
proj <- addImputeWeights(ArchRProj = proj)

markerGenes  <- c("w21" )


#Plot the UMAP Embedding with Marker Genes Overlayed w/ Imputation
plotList<-plotEmbedding(ArchRProj = proj, colorBy = "Use_CNVMatrix_10MB_GCsmooth",pal = paletteContinuous(set = "coolwarm"), name = markerGenes, imputeWeights = getImputeWeights(proj))
plotList


In [None]:
saveArchRProject(proj)