In [None]:
library(BiocManager)
library(BSgenome.Hsapiens.UCSC.hg38)
library(ArchR)
library(ggplot2)
library(TFBSTools)
library(Seurat)
library(ggplot2)
library(dplyr)
library(harmony)
library(Signac)
library(BSgenome.Hsapiens.UCSC.hg38)
library(JASPAR2018)
library(edgeR)
library(sctransform)
data("geneAnnoHg38")
data("genomeAnnoHg38")
geneAnno <- geneAnnoHg38
genomeAnno <- genomeAnnoHg38
addArchRThreads(24)


#may be dont run this for getting cnv calls
fn <- unclass(lsf.str(envir = asNamespace("ArchR"), all = TRUE))
  for(i in seq_along(fn)){
    tryCatch({
      eval(parse(text=paste0(fn[i], '<-ArchR:::', fn[i])))
    }, error = function(x){
    })
  }

### Creating scATAC LSI and plotting sub clone clusters in scATAC LSI based UMAP

In [None]:
proj<-loadArchRProject('ArchR project for GBM39/45')


In [None]:
#Reading the metadata information for barcode subclone matchin from scatools package
subclone_metadata<-read.table('TableS11/TableS12',
                            sep='\t',header=TRUE)
coldata<-getCellColData(proj)

In [None]:
#subsetting to cells with subclone calls
proj1<-subsetCells(proj, cellNames = rownames(coldata[rownames(coldata) %in% subclone_metadata$newbarcode,]))


In [None]:
proj1 <- addIterativeLSI(
  ArchRProj = proj1, 
  useMatrix = "TileMatrix",force=TRUE,iterations = 2
)

proj1 <- addUMAP(
  ArchRProj = proj1, 
  reducedDims = "IterativeLSI",force=TRUE
)

proj1 <- addClusters(input = proj1, reducedDims = "IterativeLSI", resolution =0.2,force=TRUE)




In [None]:
coldata1<-getCellColData(proj1)


In [None]:
rownames(subclone_metadata)<-subclone_metadata$newbarcode

In [None]:
coldata1$subclone_clusters<-subclone_metadata[rownames(coldata1),]$clusters


In [None]:
#Assigning subcluster as a metadata column
proj1$subcloneclusters<-coldata1$subclone_clusters

In [None]:
saveArchRProjecT(proj1, 'path to saved archr project')



### Finding differential peaks between sub clones

In [None]:
proj <- loadArchRProject('path to save archr project')

In [None]:
#getting reproducible peak 
proj <- addGroupCoverages(ArchRProj = proj, groupBy = "Clusters",force=TRUE)
#Call Reproducible Peaks w/ Macs2 (~5-10 minutes)
proj <- addReproduciblePeakSet(ArchRProj = proj,groupBy = "Clusters",force=TRUE)
#Add Peak Matrix
proj <- addPeakMatrix(ArchRProj = proj)



In [None]:
#Identifying marker peaks between two sub clones
markersPeaks <- getMarkerFeatures(
    ArchRProj = proj, 
    useMatrix = "PeakMatrix", 
    groupBy = "subcloneclusters",
  bias = c("TSSEnrichment", "log10(nFrags)"),
  testMethod = "wilcoxon"
)

In [None]:
#plotting the differential peaks from the marker test
heatmapPeaks <- markerHeatmap(
  seMarker = markersPeaks, 
  cutOff = "FDR <= 0.1 & Log2FC >= 0.5",
  transpose = TRUE
)

In [None]:
peakset_info<-data.frame(getPeakSet(proj))


### Subsetting differential peaks that are in copy neutral regions to perform motif enrichment

In [None]:
annotated_peaks<-read.csv('GBM45/39_peaks_cnv_annotated.csv')
annotated_peaks$GroupReplicate<-NULL
annotated_peaks$distToGeneStart<-NULL
annotated_peaks$peakType<-NULL
annotated_peaks$distToTSS<-NULL
annotated_peaks$idx<-NULL
annotated_peaks$N<-NULL
annotated_peaks$nearestTSS<-NULL
annotated_peaks$score<-NULL
annotated_peaks$replicateScoreQuantile<-NULL
annotated_peaks$groupScoreQuantile<-NULL
annotated_peaks$Reproducibility<-NULL
annotated_peaks$allele_state_A<-NULL
annotated_peaks$allele_state_B<-NULL
annotated_peaks$allele_state_C<-NULL
annotated_peaks$mhf_jointseg_A<-NULL
annotated_peaks$mhf_jointseg_B<-NULL
annotated_peaks$bin_idx<-NULL
annotated_peaks$GC<-NULL
annotated_peaks$strand.peaks<-NULL
annotated_peaks$width.peaks<-NULL
annotated_peaks$bin_id<-NULL
annotated_peaks$start.cnv_bins<-NULL
annotated_peaks$end.cnv_bins<-NULL
annotated_peaks$width.cnv_bins<-NULL
annotated_peaks$seqnames.cnv_bins<-NULL
annotated_peaks$width.cnv_bins<-NULL
annotated_peaks$mhf_jointseg_C<-NULL
annotated_peaks$strand.cnv_bins<-NULL





In [None]:
annotated_peaks_trim=annotated_peaks[(annotated_peaks$cnv_state_A =='neutral') & (annotated_peaks$cnv_state_B =='neutral'),c('seqnames.peaks','start.peaks','end.peaks')]
names(annotated_peaks_trim)<-c('seqnames','start','end')
head(annotated_peaks_trim)


In [None]:
req_peaks<-merge(peakset_info,annotated_peaks_trim,by=c('seqnames','start','end'))
proj<-addPeakSet(proj,peakSet=GRanges(req_peaks),force=TRUE)
proj<-addPeakMatrix(proj,force=TRUE)

In [None]:
markerTest <- getMarkerFeatures(
  ArchRProj = proj, 
  useMatrix = "PeakMatrix",
  groupBy = "subcloneclusters",
  testMethod = "wilcoxon",
  bias = c("TSSEnrichment", "log10(nFrags)"),
  useGroups = "A",
  bgdGroups = "B"
)

### Plotting the motif enrichment between Fetal vs Adult clones from GBM 39 and 45

In [None]:
df1<-read.csv('fetal_significant_sorted.csv')


df <- data.frame(TF = df1$modified_names, OR = df1$av)
df$rank1<-df1$gbm39_cloneA_Score
df <- df[order(df$OR, decreasing = TRUE),]
df$rank <- seq_len(nrow(df))


ggUp <- ggplot(df, aes(rank, OR, color = OR)) + 
  geom_point(size = 1) +
  ggrepel::geom_label_repel(
        data = df[rev(seq_len(10)), ], aes(x = rank, y = OR, label = TF), 
        size = 1.5,
        nudge_x = 1,
        color = "black"
  ) + theme_ArchR() + 
  ylab("Motif Enrichment OR") + 
  xlab("Rank Sorted TFs Enriched") +
  scale_color_gradientn(colors = paletteContinuous(set = "comet"))



ggUp



In [None]:
df1<-read.csv('adult_significant_sorted.csv')

df <- data.frame(TF = df1$modified_names, OR = df1$av)
df$rank1<-df1$gbm39_cloneA_Score
df <- df[order(df$OR, decreasing = TRUE),]
df$rank <- seq_len(nrow(df))


ggUp <- ggplot(df, aes(rank, OR, color = OR)) + 
  geom_point(size = 0.5) +
  ggrepel::geom_label_repel(
        data = df[rev(seq_len(15)), ], aes(x = rank, y = OR, label = TF), 
        size = 1.5,
        nudge_x = 10,
        color = "black"
  ) + theme_ArchR() + 
  ylab("Motif Enrichment OR") + 
  xlab("Rank Sorted TFs Enriched") +
  scale_color_gradientn(colors = paletteContinuous(set = "comet"))

plotPDF(ggUp, name = "SUBCLONAL_ADULT_differentialmotifenrichment_NEW", width = 6, height = 6, ArchRProj = proj, addDOC = FALSE)

ggUp



### Creating peaksets for the sub clones in GBM39 and GBM45

In [None]:
proj_new<-loadArchRProject('GBM 39/45 ArchrProject')


In [None]:
saveArchRProject(proj_new,' Copy of Archr project for Clone A')
saveArchRProject(proj_new,' Copy of Archr project for Clone B')


In [None]:
proj_cloneA<-loadArchRProject('GBM39/45 cloneA ArchR Project Path')
coldata<-getCellColData(proj_cloneA)

proj_cloneA<-subsetCells(proj_cloneA, cellNames = rownames(coldata[coldata$subcloneclusters=='A',]))
proj_cloneA

#getting reproducible peak 
proj_cloneA <- addGroupCoverages(ArchRProj = proj_cloneA, groupBy = "Sample",force=TRUE)
#Call Reproducible Peaks w/ Macs2 (~5-10 minutes)
proj_cloneA <- addReproduciblePeakSet(ArchRProj = proj_cloneA,groupBy = "Sample",force=TRUE)
#Add Peak Matrix
proj_cloneA <- addPeakMatrix(ArchRProj = proj_cloneA)
saveArchRProject(proj_cloneA)

In [None]:
proj_cloneB<-loadArchRProject('GBM39/45 cloneB ArchR Project Path')
coldata<-getCellColData(proj_cloneB)

proj_cloneB<-subsetCells(proj_cloneB, cellNames = rownames(coldata[coldata$subcloneclusters=='B',]))
proj_cloneB

#getting reproducible peak 
proj_cloneB <- addGroupCoverages(ArchRProj = proj_cloneB, groupBy = "Sample",force=TRUE)
#Call Reproducible Peaks w/ Macs2 (~5-10 minutes)
proj_cloneB <- addReproduciblePeakSet(ArchRProj = proj_cloneB,groupBy = "Sample",force=TRUE)
#Add Peak Matrix
proj_cloneB <- addPeakMatrix(ArchRProj = proj_cloneB)

saveArchRProject(proj_cloneB)


### Plotting the MYCL genome browser plot

In [None]:
proj_data<-loadArchRProject('GBM45 ArchR Project')
proj_data

In [None]:
p<-plotBrowserTrack(ArchRProj = proj, 
    groupBy = "subcloneclusters", 
    pal=c('A'='#E69F00', 'B'='#56B4E9'), 
    useGroups=c('A','B'),
    region=GRanges('chr1:39888399-39908412'),
    loops=NULL)



grid::grid.newpage()
grid::grid.draw(p)


