Ref:
https://www.archrproject.com/bookdown/creating-an-archrproject-1.html

In [1]:
library(ArchR)
library(tidyverse)
library(BSgenome.Hsapiens.UCSC.hg38)
library(SingleCellExperiment)


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _  \     
         /  ^  \    |  |_) 

In [2]:
getwd()

In [3]:
# before starting a project we must set the ArchRGenome and default threads for parallelization.
# Setting default genome to Hg38.
addArchRGenome("hg38")

Setting default genome to Hg38.



In [4]:
# Setting default number of Parallel threads to 16
addArchRThreads(threads = 1) 

Setting default number of Parallel threads to 1.



# Read in ArchR project

In [5]:
archr_project_path = '/nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output'
proj = loadArchRProject(path = archr_project_path, showLogo = FALSE)
proj

Successfully loaded ArchRProject!


           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _  \     
         /  ^  \    |  |_)  |    |  ,----'|  |__|  | |  |_)  |    
        /  /_\  \   |      /     |  |     |   __   | |      /     
       /  _____  \  |  |\  \\___ |  `----.|  |  |  | |  |\  \\___.
      /__/     \__\ | _| `._____| \______||__|  |__| | _| `._____|
    



class: ArchRProject 
outputDirectory: /nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output 
samples(37): BHF_F_Hea11064670_BHF_F_Hea11031823
  BHF_F_Hea11064671_BHF_F_Hea11031824 ...
  HCAHeartST13386009_HCAHeartST13303419
  HCAHeartST13386010_HCAHeartST13303420
sampleColData names(1): ArrowFiles
cellColData names(63): Sample TSSEnrichment ... Gex_MitoRatio
  Gex_RiboRatio
numberOfCells(1): 167022
medianTSS(1): 11.614
medianFrags(1): 10338.5

# Peak matrix

In [6]:
PeakMatrix = getMatrixFromProject(
             ArchRProj = proj,
              useMatrix = "PeakMatrix",
              useSeqnames = NULL,
              verbose = TRUE,
              binarize = FALSE,
              threads = getArchRThreads(),
              logFile = createLogFile("getMatrixFromProject")
             )

ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-23502ed73aec-Date-2023-03-12_Time-19-51-39.log
If there is an issue, please report to github with logFile!

2023-03-12 20:13:14 : Organizing colData, 21.576 mins elapsed.

2023-03-12 20:13:16 : Organizing rowData, 21.621 mins elapsed.

2023-03-12 20:13:16 : Organizing rowRanges, 21.622 mins elapsed.

2023-03-12 20:13:16 : Organizing Assays (1 of 1), 21.624 mins elapsed.

2023-03-12 20:23:20 : Constructing SummarizedExperiment, 31.689 mins elapsed.

2023-03-12 20:29:42 : Finished Matrix Creation, 38.046 mins elapsed.



In [7]:
# get peak names as ranges
PeakSet = getPeakSet(proj)
df = data.frame(PeakSet)
ranges = paste0(df[['seqnames']],':',df[['start']],'_',df[['end']])
head(ranges)

# add rawnames of PeakMatrix
rownames(PeakMatrix) = ranges
PeakMatrix

# add rawnames of PeakMatrix binarised
# rownames(PeakMatrix_bi) = ranges
# PeakMatrix_bi

class: RangedSummarizedExperiment 
dim: 508040 167022 
metadata(0):
assays(1): PeakMatrix
rownames(508040): chr1:817093_817593 chr1:819818_820318 ...
  chrX:155881064_155881564 chrX:155881618_155882118
rowData names(1): idx
colnames(167022): 7089STDY13216921_BHF_F_Hea13242528#GCAATAGAGTTATGTG-1
  7089STDY13216921_BHF_F_Hea13242528#TGGGCCTAGATGGACA-1 ...
  BHF_F_Hea11933675_BHF_F_Hea11596628#GTACTAATCGTTTCCA-1
  7089STDY13216923_BHF_F_Hea13242530#GCTGGTTCAAATTCGT-1
colData names(63): BlacklistRatio Gex_MitoRatio ... ReadsInPeaks FRIP

## Save as matrix

In [8]:
# save as cell-by-peak
writeMM(obj = assay(PeakMatrix), file="/nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output/PeakMatrix/matrix.mtx")

NULL

In [9]:
write.table(rownames(PeakMatrix), file="/nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output/PeakMatrix/features.tsv",
           col.names = FALSE, row.names = FALSE, quote = FALSE, sep='\t')

In [10]:
# row.names = TRUE to put the features in the 2nd column (for scanpy.read_10x_mtx later we use)
write.table(colnames(PeakMatrix), file="/nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output/PeakMatrix/barcodes.tsv",
           col.names = FALSE, row.names = TRUE, quote = FALSE, sep='\t')

In [None]:
colnames(PeakMatrix)

In [None]:
# at farm
gzip -v /nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output/PeakMatrix/matrix.mtx
# gzip -v /nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output/PeakMatrix/matrix_binarised.mtx
gzip -v /nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output/PeakMatrix/features.tsv
gzip -v /nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output/PeakMatrix/barcodes.tsv

# GeneScoreMatrix

In [6]:
GeneScoreMatrix = getMatrixFromProject(
             ArchRProj = proj,
              useMatrix = "GeneScoreMatrix",
              useSeqnames = NULL,
              verbose = TRUE,
              binarize = FALSE,
              threads = getArchRThreads(),
              logFile = createLogFile("getMatrixFromProject")
             )

ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-2d21e6a568ca7-Date-2023-11-05_Time-22-34-07.log
If there is an issue, please report to github with logFile!

2023-11-05 23:01:34 : Organizing colData, 27.446 mins elapsed.

2023-11-05 23:01:38 : Organizing rowData, 27.515 mins elapsed.

2023-11-05 23:01:38 : Organizing rowRanges, 27.515 mins elapsed.

2023-11-05 23:01:38 : Organizing Assays (1 of 1), 27.516 mins elapsed.

2023-11-05 23:15:40 : Constructing SummarizedExperiment, 41.545 mins elapsed.

2023-11-05 23:15:45 : Finished Matrix Creation, 41.624 mins elapsed.



In [7]:
GeneScoreMatrix

class: SummarizedExperiment 
dim: 24919 167022 
metadata(0):
assays(1): GeneScoreMatrix
rownames: NULL
rowData names(6): seqnames start ... name idx
colnames(167022): 7089STDY13216921_BHF_F_Hea13242528#GCAATAGAGTTATGTG-1
  7089STDY13216921_BHF_F_Hea13242528#TGGGCCTAGATGGACA-1 ...
  BHF_F_Hea11933675_BHF_F_Hea11596628#GTACTAATCGTTTCCA-1
  7089STDY13216923_BHF_F_Hea13242530#GCTGGTTCAAATTCGT-1
colData names(63): BlacklistRatio Gex_MitoRatio ... ReadsInPeaks FRIP

In [8]:
length(colnames(GeneScoreMatrix))
head(colnames(GeneScoreMatrix))

In [9]:
# add rawnames of GeneScoreMatrix
rownames(GeneScoreMatrix) = rowData(GeneScoreMatrix)$name
GeneScoreMatrix

class: SummarizedExperiment 
dim: 24919 167022 
metadata(0):
assays(1): GeneScoreMatrix
rownames(24919): OR4F5 LOC729737 ... F8A2 TMLHE
rowData names(6): seqnames start ... name idx
colnames(167022): 7089STDY13216921_BHF_F_Hea13242528#GCAATAGAGTTATGTG-1
  7089STDY13216921_BHF_F_Hea13242528#TGGGCCTAGATGGACA-1 ...
  BHF_F_Hea11933675_BHF_F_Hea11596628#GTACTAATCGTTTCCA-1
  7089STDY13216923_BHF_F_Hea13242530#GCTGGTTCAAATTCGT-1
colData names(63): BlacklistRatio Gex_MitoRatio ... ReadsInPeaks FRIP

## Save as matrix

In [10]:
# save
writeMM(obj = assay(GeneScoreMatrix), file="/nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output/GeneScoreMatrix/matrix.mtx")

NULL

In [11]:
head(rownames(GeneScoreMatrix))
head(colnames(GeneScoreMatrix))

In [12]:
write.table(rownames(GeneScoreMatrix), file="/nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output/GeneScoreMatrix/features.tsv",
           col.names = FALSE, row.names = FALSE, quote = FALSE, sep='\t')

# row.names = TRUE to put the features in the 2nd column (for scanpy.read_10x_mtx later we use)
write.table(colnames(GeneScoreMatrix), file="/nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output/GeneScoreMatrix/barcodes.tsv",
           col.names = FALSE, row.names = TRUE, quote = FALSE, sep='\t')

In [13]:
%%bash
gzip -v /nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output/GeneScoreMatrix/matrix.mtx
gzip -v /nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output/GeneScoreMatrix/features.tsv
gzip -v /nfs/team205/heart/anndata_objects/Foetal/multiome_ATAC/ArchR/project_output/GeneScoreMatrix/barcodes.tsv


ERROR: Error in parse(text = x, srcfile = src): <text>:1:1: unexpected input
1: %bash
    ^
