In [None]:
library(BiocManager)
library(BSgenome.Hsapiens.UCSC.hg38)
library(ArchR)
library(ggplot2)
library(TFBSTools)
library(Seurat)
library(ggplot2)
library(dplyr)
library(harmony)
library(Signac)
library(BSgenome.Hsapiens.UCSC.hg38)
library(JASPAR2018)
library(edgeR)
library(sctransform)
data("geneAnnoHg38")
data("genomeAnnoHg38")
geneAnno <- geneAnnoHg38
genomeAnno <- genomeAnnoHg38
addArchRThreads(24)


#may be dont run this for getting cnv calls
fn <- unclass(lsf.str(envir = asNamespace("ArchR"), all = TRUE))
  for(i in seq_along(fn)){
    tryCatch({
      eval(parse(text=paste0(fn[i], '<-ArchR:::', fn[i])))
    }, error = function(x){
    })
  }

In [None]:
proj<-loadArchRProject("Path for the stored ArchR Project")


In [None]:
#Read in the motif instances file
final_motif_list<-read.table('Motif instances Cleaned/Uncleaned',sep=',',header=TRUE)

#Creating an object of GRanges
final_granges_list1<-c()
for (i in unique(final_motif_list$group_name)){
    print(i)
    temp_list<-final_motif_list[final_motif_list$group_name==i,c('seqnames','start','end')]
    temp_list$strand<-'.'
    temp_list$group_name<-NULL
    names(temp_list)<-c('seqnames','start','end','strand')
    row.names(temp_list) <- NULL
    final_granges_list1[[i]]<-GRanges(temp_list)
}

#Adding the annotation to the project
proj<-addPeakAnnotations(proj,regions=final_granges_list1,name='vierstra_model_cleaned',force=TRUE)


In [None]:
#Extracting all motif instances from the cleaned annotation
motifPositions <- getPositions(proj,name = 'vierstra_model_cleaned')
motifs<-names(motifPositions)
markerMotifs <- unlist(lapply(motifs, function(x) grep(x, names(motifPositions), value = TRUE)))
head(motifPositions[markerMotifs])                              
                              

In [None]:
#Extracting the footprints per BRCA subtype
seFoot_cleaned <- getFootprints(
  ArchRProj = proj2, 
  positions = motifPositions, 
  groupBy = "subtype"
)

In [None]:
#Computing the maximal values of the footprint for the different subtypes
cleaned_Luminal_values<-c()
cleaned_Basal_values<-c()
cleaned_Her2_values<-c()
cleaned_names<-c()
for ( i in names(assays(seFoot))){
    name = i
    rowDF <- SummarizedExperiment::rowData(seFoot)
    footMat <- .getAssay(seFoot[BiocGenerics::which(rowDF[,2]=="footprint"),], name)
    biasMat <- .getAssay(seFoot[BiocGenerics::which(rowDF[,2]=="bias"),], name)
    footDF <- rowDF[BiocGenerics::which(rowDF[,2]=="footprint"),]
    biasDF <- rowDF[BiocGenerics::which(rowDF[,2]=="bias"),]
    smoothWindow=5
    if(!is.null(smoothWindow)){
        footMat <- apply(footMat, 2, function(x) .centerRollMean(x, smoothWindow))
        biasMat <- apply(biasMat, 2, function(x) .centerRollMean(x, smoothWindow))
      }
    flank = 250
    flankNorm = 50
    #Normalize Foot and Bias Mat
    idx <- which(abs(footDF$x) >= flank - flankNorm)
    footMat <- t(t(footMat) / colMeans(footMat[idx, ,drop=FALSE]))
    biasMat <- t(t(biasMat) / colMeans(biasMat[idx, ,drop=FALSE]))
    #
    footMat <- footMat / biasMat
    #
    footMatMean <- .groupMeans(footMat, SummarizedExperiment::colData(seFoot)$Group)
    footMatSd <- .groupSds(footMat, SummarizedExperiment::colData(seFoot)$Group)
    biasMatMean <- .groupMeans(biasMat, SummarizedExperiment::colData(seFoot)$Group)
    biasMatSd <- .groupSds(biasMat, SummarizedExperiment::colData(seFoot)$Group)
    smoothFoot <- rowMaxs(apply(footMatMean, 2, function(x) .centerRollMean(x, 11)))
    #
    plotIdx <- seq_len(nrow(footMatMean)) #sort(unique(c(1, seq(1, nrow(footMatMean), smoothWindow), nrow(footMatMean))))
    plotFootDF <- lapply(seq_len(ncol(footMatMean)), function(x){
        data.frame(
          x = footDF$x,
          mean = footMatMean[,x],
          sd = footMatSd[,x],
          group = colnames(footMatMean)[x]
          )[plotIdx,,drop=FALSE]
      }) %>% Reduce("rbind",. )
    plotFootDF$group <- factor(paste0(plotFootDF$group), levels = unique(gtools::mixedsort(paste0(plotFootDF$group))))
    #
    cleaned_Luminal_values<- c(cleaned_Luminal_values,max(plotFootDF[plotFootDF$group=='Lum',]$mean))
    cleaned_Basal_values<- c(cleaned_Basal_values,max(plotFootDF[plotFootDF$group=='BASAL',]$mean))
    cleaned_Her2_values<- c(cleaned_Her2_values,max(plotFootDF[plotFootDF$group=='HER2',]$mean))
    cleaned_names<-c(cleaned_names,name)
}

In [None]:
#Repeating the whole analysis for uncleaned motifs
motifPositions_uncleaned <- getPositions(proj2,name = 'vierstra_model_uncleaned')
seFoot <- getFootprints(
  ArchRProj = proj, 
  positions = motifPositions_uncleaned, 
  groupBy = "subtype"
)             

uncleaned_Luminal_values<-c()
uncleaned_Basal_values<-c()
uncleaned_Her2_values<-c()
uncleaned_names<-c()
for ( i in names(assays(seFoot))){
    name = i
    rowDF <- SummarizedExperiment::rowData(seFoot)
    footMat <- .getAssay(seFoot[BiocGenerics::which(rowDF[,2]=="footprint"),], name)
    biasMat <- .getAssay(seFoot[BiocGenerics::which(rowDF[,2]=="bias"),], name)
    footDF <- rowDF[BiocGenerics::which(rowDF[,2]=="footprint"),]
    biasDF <- rowDF[BiocGenerics::which(rowDF[,2]=="bias"),]
    smoothWindow=5
    if(!is.null(smoothWindow)){
        footMat <- apply(footMat, 2, function(x) .centerRollMean(x, smoothWindow))
        biasMat <- apply(biasMat, 2, function(x) .centerRollMean(x, smoothWindow))
      }
    flank = 250
    flankNorm = 50
    #Normalize Foot and Bias Mat
    idx <- which(abs(footDF$x) >= flank - flankNorm)
    footMat <- t(t(footMat) / colMeans(footMat[idx, ,drop=FALSE]))
    biasMat <- t(t(biasMat) / colMeans(biasMat[idx, ,drop=FALSE]))
    #
    footMat <- footMat / biasMat
    #
    footMatMean <- .groupMeans(footMat, SummarizedExperiment::colData(seFoot)$Group)
    footMatSd <- .groupSds(footMat, SummarizedExperiment::colData(seFoot)$Group)
    biasMatMean <- .groupMeans(biasMat, SummarizedExperiment::colData(seFoot)$Group)
    biasMatSd <- .groupSds(biasMat, SummarizedExperiment::colData(seFoot)$Group)
    smoothFoot <- rowMaxs(apply(footMatMean, 2, function(x) .centerRollMean(x, 11)))
    #
    plotIdx <- seq_len(nrow(footMatMean)) #sort(unique(c(1, seq(1, nrow(footMatMean), smoothWindow), nrow(footMatMean))))
    plotFootDF <- lapply(seq_len(ncol(footMatMean)), function(x){
        data.frame(
          x = footDF$x,
          mean = footMatMean[,x],
          sd = footMatSd[,x],
          group = colnames(footMatMean)[x]
          )[plotIdx,,drop=FALSE]
      }) %>% Reduce("rbind",. )
    plotFootDF$group <- factor(paste0(plotFootDF$group), levels = unique(gtools::mixedsort(paste0(plotFootDF$group))))
    #
    uncleaned_Luminal_values<- c(uncleaned_Luminal_values,max(plotFootDF[plotFootDF$group=='Lum',]$mean))
    uncleaned_Basal_values<- c(uncleaned_Basal_values,max(plotFootDF[plotFootDF$group=='BASAL',]$mean))
    uncleaned_Her2_values<- c(uncleaned_Her2_values,max(plotFootDF[plotFootDF$group=='HER2',]$mean))
    uncleaned_names<-c(uncleaned_names,name)
}

In [None]:
#Merging the footprint strength per motif and plotting
df_Cleaned_basal<-cbind(cleaned_names,cleaned_Basal_values)
df_UNCleaned_basal<-cbind(uncleaned_names,uncleaned_Basal_values)
df_basal<- merge(df_Cleaned_basal,df_UNCleaned_basal,by.x='cleaned_names',by.y='uncleaned_names')
df_basal$cleaned_Basal_values<-as.numeric(df_basal$cleaned_Basal_values)
df_basal$uncleaned_Basal_values<-as.numeric(df_basal$uncleaned_Basal_values)
dim(df_basal[df_basal$cleaned_Basal_values < df_basal$uncleaned_Basal_values,])
df_basal[df_basal$cleaned_Basal_values < df_basal$uncleaned_Basal_values,]
#plotting
library(ggplot2)
b<-ggplot(df_basal, aes(x=uncleaned_Basal_values, y=cleaned_Basal_values)) + geom_point() + geom_abline() 
b

