In [1]:
library(ArchR)
library(tidyverse)
library(BSgenome.Hsapiens.UCSC.hg38)
library(SingleCellExperiment)
library(anndata)
# library(sceasy)
# library(reticulate)
# use_condaenv('cellpymc')
# loompy <- reticulate::import('loompy')


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _  \     
         /  ^  \    |  |_) 

In [2]:
setwd('/nfs/team205/heart/anndata_objects/8regions/ArchR')
getwd()

In [3]:
# before starting a project we must set the ArchRGenome and default threads for parallelization.
# Setting default genome to Hg38.
addArchRGenome("hg38")

Setting default genome to Hg38.



In [4]:
# Setting default number of Parallel threads to 16
addArchRThreads(threads = 10) 

Setting default number of Parallel threads to 10.



# Read in ArchR project

In [5]:
archr_project_path = '/nfs/team205/heart/anndata_objects/8regions/ArchR/project_output'
proj = loadArchRProject(path = archr_project_path, showLogo = FALSE)
proj

Successfully loaded ArchRProject!


           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _  \     
         /  ^  \    |  |_)  |    |  ,----'|  |__|  | |  |_)  |    
        /  /_\  \   |      /     |  |     |   __   | |      /     
       /  _____  \  |  |\  \\___ |  `----.|  |  |  | |  |\  \\___.
      /__/     \__\ | _| `._____| \______||__|  |__| | _| `._____|
    



class: ArchRProject 
outputDirectory: /nfs/team205/heart/anndata_objects/8regions/ArchR/project_output 
samples(47): HCAHeart9508627_HCAHeart9508819
  HCAHeart9508628_HCAHeart9508820 ...
  HCAHeartST13180618_HCAHeartST13177115
  HCAHeartST13180619_HCAHeartST13177116
sampleColData names(1): ArrowFiles
cellColData names(52): Sample TSSEnrichment ... Gex_MitoRatio
  Gex_RiboRatio
numberOfCells(1): 139835
medianTSS(1): 8.699
medianFrags(1): 9459

# Prepare Peak annotation

In [6]:
# gets peak annotation matches from a given ArchRProject.
peakmeta = getMatches(proj)
peakmeta

class: RangedSummarizedExperiment 
dim: 429828 870 
metadata(0):
assays(1): matches
rownames(429828): vCM3_stressed LYVE1+MP_cycling ... vCM3_stressed FB3
rowData names(13): score replicateScoreQuantile ... idx N
colnames(870): TFAP2B_1 TFAP2D_2 ... TBX18_869 TBX22_870
colData names(0):

In [7]:
# get peak-by-TFmotif matrix (boolean type)
peak_motif_mtx <- assays(peakmeta)[['matches']]
rownames(peak_motif_mtx) <- rowRanges(peakmeta) %>% {paste0(seqnames(.), "_", start(.), "-", end(.))}
peak_motif_mtx[1:5,1:5]

5 x 5 sparse Matrix of class "lgCMatrix"
                   TFAP2B_1 TFAP2D_2 TFAP2C_3 TFAP2E_4 TFAP2A_5
chr1_794932-795432        .        .        .        .        .
chr1_817104-817604        .        .        .        .        .
chr1_818775-819275        .        .        .        .        .
chr1_819697-820197        .        .        .        .        .
chr1_821364-821864        .        .        .        .        .

# Read in pre-calculated peak2gene dataframe

In [8]:
table(proj$cell_type)


                 Adipocyte       Atrial Cardiomyocyte 
                      1374                      14163 
          Endothelial cell                 Fibroblast 
                     16104                      42479 
Lymphatic Endothelial cell                   Lymphoid 
                       334                       4399 
                 Mast cell           Mesothelial cell 
                       274                         62 
                Mural cell                    Myeloid 
                     10121                      13660 
               Neural cell  Ventricular Cardiomyocyte 
                      1581                      35284 

In [9]:
celltype_to_analyse = 'Atrial Cardiomyocyte'

In [10]:
p2geneDF = read.csv(paste0('/nfs/team205/heart/anndata_objects/8regions/ArchR/project_output/csv/peak2gene/',
                           sub(' ','',celltype_to_analyse),'.csv'))
dim(p2geneDF)
head(p2geneDF)

Unnamed: 0_level_0,idxATAC,idxRNA,Correlation,FDR,VarQATAC,VarQRNA,geneName,peakName
Unnamed: 0_level_1,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
1,1,10,-0.02040158,0.7933998,0.02826247,0.1150766,AC114498.1,chr1_794932-795432
2,2,10,0.033200608,0.6492725,0.49832491,0.1150766,AC114498.1,chr1_817104-817604
3,3,10,-0.047132556,0.4919864,0.68155634,0.1150766,AC114498.1,chr1_818775-819275
4,4,10,0.001703915,0.9847709,0.39796384,0.1150766,AC114498.1,chr1_819697-820197
5,5,10,0.027802571,0.7105859,0.51944266,0.1150766,AC114498.1,chr1_821364-821864
6,6,10,0.058411057,0.3736228,0.55871186,0.1150766,AC114498.1,chr1_825517-826017


# Generate "TF - possible downstream genes" dictionary

In [11]:
# arguments
p2g_corr_thresh = -0.2 # for negatively correlated links!!!
p2g_FDR_thresh = 1e-04

In [12]:
# generate dictionary
network = list() 

# filter peaks
df <- p2geneDF[which((p2geneDF[['Correlation']] < p2g_corr_thresh)==TRUE),] # negatively correlated links!!!
df <- df[which((df[['FDR']]<p2g_FDR_thresh)==TRUE),]

TFs = strsplit(colnames(peak_motif_mtx),'_')%>%lapply(function(x){x}[1])%>%unlist()
for(tf in TFs){
    tf_peaks = rownames(peak_motif_mtx)[which(peak_motif_mtx[,which(TFs==tf)] == TRUE)]
    network[[tf]] = df[which((df[['peakName']] %in% tf_peaks)==TRUE),] %>% pull(geneName)
}

In [13]:
length(network)

In [14]:
length_vector = c()
for(tf in names(network)){
    length_vector = c(length_vector,length(network[[tf]]))
}

In [15]:
library(jsonlite)
write_json(network,
           paste0('/nfs/team205/heart/anndata_objects/8regions/ArchR/project_output/TF_downstreamGene/',
                    sub(' ','',celltype_to_analyse),'_p2gCorr-negative-',p2g_corr_thresh,'.json')
          )


Attaching package: ‘jsonlite’


The following object is masked from ‘package:purrr’:

    flatten


