In [3]:
suppressPackageStartupMessages(library(tidyverse))
suppressPackageStartupMessages(library(GenomicRanges))
suppressPackageStartupMessages(library(plyranges))
suppressPackageStartupMessages(library(Signac))

### PBMC

In [4]:
piscem_data_dir <- "/fs/nexus-projects/scATAC-seq/piscem/map_output/10k_pbmc_ATACv2_nextgem_Chromium_Controller_fastqs/k25_m_17/bin-size=1000_thr=0.7_orp=false"
chromap_data_dir <- "/fs/cbcb-lab/rob/students/noor/Atacseq/chromap_output/map_output/10k_pbmc_ATACv2_nextgem_Chromium_Controller_fastqs"
soft_mask_dir <- "/fs/nexus-projects/scATAC-seq/reference"
cellranger_data_dir <- "/fs/cbcb-lab/rob/students/noor/Atacseq/cell_ranger_output/10k_pbmc_ATACv2_nextgem_Chromium_Controller_fastqs"

In [5]:
chromapPeaks <- read.table(file.path(chromap_data_dir, "macs2_peaks.narrowPeak"))
chromapPeaks <- chromapPeaks[,c(1:3)]
colnames(chromapPeaks) <- c("chr","start","end")
chromapPeaksGr <- makeGRangesFromDataFrame(chromapPeaks)
chromapPeaksGr <- keepStandardChromosomes(chromapPeaksGr, pruning.mode="coarse")

piscemPeaks <- read.table(file.path(piscem_data_dir, "macs2_peaks.narrowPeak"))
piscemPeaks <- piscemPeaks[,c(1:3)]
colnames(piscemPeaks) <- c("chr","start","end")
piscemPeaksGr <- makeGRangesFromDataFrame(piscemPeaks)
piscemPeaksGr <- keepStandardChromosomes(piscemPeaksGr, pruning.mode="coarse")

softMaskPeaks <- read.table(file.path(soft_mask_dir, "softmask.bed"))
softMaskPeaks <- softMaskPeaks[,c(1:3)]
colnames(softMaskPeaks) <- c("chr","start","end")
softMaskPeaksGr <- makeGRangesFromDataFrame(softMaskPeaks)
softMaskPeaks <- keepStandardChromosomes(softMaskPeaksGr, pruning.mode="coarse")

cellRangerMacs2Peaks <- read.table(file.path(cellranger_data_dir, "macs2_peaks.narrowPeak"))
cellRangerMacs2Peaks <- cellRangerMacs2Peaks[,c(1:3)]
colnames(cellRangerMacs2Peaks) <- c("chr","start","end")
cellRangerMacs2PeaksGr <- makeGRangesFromDataFrame(cellRangerMacs2Peaks)
cellRangerMacs2PeaksGr <- keepStandardChromosomes(cellRangerMacs2PeaksGr, pruning.mode="coarse")

#### AFA vs Chromap

In [6]:
sum(width(intersect_ranges(chromapPeaksGr, piscemPeaksGr)))/sum(width(chromapPeaksGr))
sum(width(intersect_ranges(chromapPeaksGr, piscemPeaksGr)))/sum(width(piscemPeaksGr))

#### CellRanger vs Chromap

In [7]:
sum(width(intersect_ranges(chromapPeaksGr, cellRangerMacs2PeaksGr)))/sum(width(chromapPeaksGr))
sum(width(intersect_ranges(chromapPeaksGr, cellRangerMacs2PeaksGr)))/sum(width(cellRangerMacs2PeaksGr))

#### CellRanger vs AFA

In [8]:
sum(width(intersect_ranges(piscemPeaksGr, cellRangerMacs2PeaksGr)))/sum(width(piscemPeaksGr))
sum(width(intersect_ranges(piscemPeaksGr, cellRangerMacs2PeaksGr)))/sum(width(cellRangerMacs2PeaksGr))

#### Chromap vs Piscem unique

In [9]:
chromUn <- setdiff_ranges(chromapPeaksGr, piscemPeaksGr)
sum(width(chromUn))/1e6
piscUn <- setdiff_ranges(piscemPeaksGr, chromapPeaksGr)
sum(width(piscUn))/1e6

#### Chromap unique intersect softmask

In [10]:
soft_chrom <- intersect_ranges(softMaskPeaksGr, chromUn)
sum(width(soft_chrom))
sum(width(soft_chrom))/sum(width(chromUn))

#### Piscem unique intersect softmask

In [11]:
soft_piscem <- intersect_ranges(softMaskPeaksGr, piscUn)
sum(width(soft_piscem))
sum(width(soft_piscem))/sum(width(piscUn))

#### Chromap unique intersect blacklist

In [12]:
black_chrom <- intersect_ranges(blacklist_hg38_unified, chromUn)
sum(width(black_chrom))
sum(width(black_chrom))/sum(width(chromUn))

#### Piscem unique intersect blacklist

In [13]:
black_pisc <- intersect_ranges(blacklist_hg38_unified, piscUn)
sum(width(black_pisc))
sum(width(black_pisc))/sum(width(piscUn))

### 8K Mouse Cortex

In [14]:
piscem_data_dir <- "/fs/nexus-projects/scATAC-seq/piscem/map_output/8k_mouse_cortex_ATACv2_nextgem_Chromium_Controller_fastqs/k25_m_17/bin-size=1000_thr=0.7_orp=false"
chromap_data_dir <- "/fs/cbcb-lab/rob/students/noor/Atacseq/chromap_output/map_output/8k_mouse_cortex_ATACv2_nextgem_Chromium_Controller_fastqs"
soft_mask_dir <- "/fs/nexus-projects/scATAC-seq/reference"
cellranger_data_dir <- "/fs/cbcb-lab/rob/students/noor/Atacseq/cell_ranger_output/8k_mouse_cortex_ATACv2_nextgem_Chromium_Controller_fastqs"

In [15]:
chromapPeaks <- read.table(file.path(chromap_data_dir, "macs2_peaks.narrowPeak"))
chromapPeaks <- chromapPeaks[,c(1:3)]
colnames(chromapPeaks) <- c("chr","start","end")
chromapPeaksGr <- makeGRangesFromDataFrame(chromapPeaks)
chromapPeaksGr <- keepStandardChromosomes(chromapPeaksGr, pruning.mode="coarse")

piscemPeaks <- read.table(file.path(piscem_data_dir, "macs2_peaks.narrowPeak"))
piscemPeaks <- piscemPeaks[,c(1:3)]
colnames(piscemPeaks) <- c("chr","start","end")
piscemPeaksGr <- makeGRangesFromDataFrame(piscemPeaks)
piscemPeaksGr <- keepStandardChromosomes(piscemPeaksGr, pruning.mode="coarse")

softMaskPeaks <- read.table(file.path(soft_mask_dir, "softmask_mm10.bed"))
softMaskPeaks <- softMaskPeaks[,c(1:3)]
colnames(softMaskPeaks) <- c("chr","start","end")
softMaskPeaksGr <- makeGRangesFromDataFrame(softMaskPeaks)
softMaskPeaks <- keepStandardChromosomes(softMaskPeaksGr, pruning.mode="coarse")

cellRangerMacs2Peaks <- read.table(file.path(cellranger_data_dir, "macs2_peaks.narrowPeak"))
cellRangerMacs2Peaks <- cellRangerMacs2Peaks[,c(1:3)]
colnames(cellRangerMacs2Peaks) <- c("chr","start","end")
cellRangerMacs2PeaksGr <- makeGRangesFromDataFrame(cellRangerMacs2Peaks)
cellRangerMacs2PeaksGr <- keepStandardChromosomes(cellRangerMacs2PeaksGr, pruning.mode="coarse")

In [16]:
sum(width(intersect_ranges(chromapPeaksGr, piscemPeaksGr)))/sum(width(chromapPeaksGr))
sum(width(intersect_ranges(chromapPeaksGr, piscemPeaksGr)))/sum(width(piscemPeaksGr))

In [17]:
sum(width(intersect_ranges(chromapPeaksGr, cellRangerMacs2PeaksGr)))/sum(width(chromapPeaksGr))
sum(width(intersect_ranges(chromapPeaksGr, cellRangerMacs2PeaksGr)))/sum(width(cellRangerMacs2PeaksGr))

In [18]:
sum(width(intersect_ranges(piscemPeaksGr, cellRangerMacs2PeaksGr)))/sum(width(piscemPeaksGr))
sum(width(intersect_ranges(piscemPeaksGr, cellRangerMacs2PeaksGr)))/sum(width(cellRangerMacs2PeaksGr))

In [17]:
chromUn <- setdiff_ranges(chromapPeaksGr, piscemPeaksGr)
sum(width(chromUn))/1e6
piscUn <- setdiff_ranges(piscemPeaksGr, chromapPeaksGr)
sum(width(piscUn))/1e6

In [23]:
soft_chrom <- intersect_ranges(softMaskPeaksGr, chromUn)
sum(width(soft_chrom))
sum(width(soft_chrom))/sum(width(chromUn))

“Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': GL456210.1, GL456211.1, GL456212.1, GL456213.1, GL456216.1, GL456219.1, GL456221.1, GL456233.1, GL456239.1, GL456350.1, GL456354.1, GL456359.1, GL456360.1, GL456366.1, GL456367.1, GL456368.1, GL456370.1, GL456372.1, GL456378.1, GL456379.1, GL456381.1, GL456382.1, GL456383.1, GL456385.1, GL456387.1, GL456389.1, GL456390.1, GL456392.1, GL456393.1, GL456394.1, GL456396.1, JH584292.1, JH584293.1, JH584294.1, JH584295.1, JH584296.1, JH584297.1, JH584298.1, JH584299.1, JH584300.1, JH584301.1, JH584302.1, JH584303.1, JH584304.1
  - in 'y': chrM
  Make sure to always combine/compare objects based on the same reference


In [24]:
soft_piscem <- intersect_ranges(softMaskPeaksGr, piscUn)
sum(width(soft_piscem))
sum(width(soft_piscem))/sum(width(piscUn))

“Each of the 2 combined objects has sequence levels not in the other:
  - in 'x': GL456210.1, GL456211.1, GL456212.1, GL456213.1, GL456216.1, GL456219.1, GL456221.1, GL456233.1, GL456239.1, GL456350.1, GL456354.1, GL456359.1, GL456360.1, GL456366.1, GL456367.1, GL456368.1, GL456370.1, GL456372.1, GL456378.1, GL456379.1, GL456381.1, GL456382.1, GL456383.1, GL456385.1, GL456387.1, GL456389.1, GL456390.1, GL456392.1, GL456393.1, GL456394.1, GL456396.1, JH584292.1, JH584293.1, JH584294.1, JH584295.1, JH584296.1, JH584297.1, JH584298.1, JH584299.1, JH584300.1, JH584301.1, JH584302.1, JH584303.1, JH584304.1
  - in 'y': chrM
  Make sure to always combine/compare objects based on the same reference


In [25]:
black_chrom <- intersect_ranges(blacklist_mm10, chromUn)
sum(width(black_chrom))
sum(width(black_chrom))/sum(width(chromUn))

In [27]:
black_pisc <- intersect_ranges(blacklist_mm10, piscUn)
sum(width(black_pisc))
sum(width(black_pisc))/sum(width(piscUn))

### Human 3K dataset

In [19]:
piscem_data_dir <- "/fs/nexus-projects/scATAC-seq/piscem/map_output/human_brain_3k_fastqs/k25_m_17/bin-size=1000_thr=0.7_orp=false"
chromap_data_dir <- "/fs/cbcb-lab/rob/students/noor/Atacseq/chromap_output/map_output/human_brain_3k_fastqs"
soft_mask_dir <- "/fs/nexus-projects/scATAC-seq/reference"

In [20]:
chromapPeaks <- read.table(file.path(chromap_data_dir, "macs2_peaks.narrowPeak"))
chromapPeaks <- chromapPeaks[,c(1:3)]
colnames(chromapPeaks) <- c("chr","start","end")
chromapPeaksGr <- makeGRangesFromDataFrame(chromapPeaks)
chromapPeaksGr <- keepStandardChromosomes(chromapPeaksGr, pruning.mode="coarse")

piscemPeaks <- read.table(file.path(piscem_data_dir, "macs2_peaks.narrowPeak"))
piscemPeaks <- piscemPeaks[,c(1:3)]
colnames(piscemPeaks) <- c("chr","start","end")
piscemPeaksGr <- makeGRangesFromDataFrame(piscemPeaks)
piscemPeaksGr <- keepStandardChromosomes(piscemPeaksGr, pruning.mode="coarse")

softMaskPeaks <- read.table(file.path(soft_mask_dir, "softmask.bed"))
softMaskPeaks <- softMaskPeaks[,c(1:3)]
colnames(softMaskPeaks) <- c("chr","start","end")
softMaskPeaksGr <- makeGRangesFromDataFrame(softMaskPeaks)
softMaskPeaks <- keepStandardChromosomes(softMaskPeaksGr, pruning.mode="coarse")

In [21]:
sum(width(intersect_ranges(chromapPeaksGr, piscemPeaksGr)))/sum(width(chromapPeaksGr))
sum(width(intersect_ranges(chromapPeaksGr, piscemPeaksGr)))/sum(width(piscemPeaksGr))

In [22]:
chromUn <- setdiff_ranges(chromapPeaksGr, piscemPeaksGr)
sum(width(chromUn))/1e6
piscUn <- setdiff_ranges(piscemPeaksGr, chromapPeaksGr)
sum(width(piscUn))/1e6


In [23]:
soft_chrom <- intersect_ranges(softMaskPeaksGr, chromUn)
sum(width(soft_chrom))
sum(width(soft_chrom))/sum(width(chromUn))

In [24]:
soft_piscem <- intersect_ranges(softMaskPeaksGr, piscUn)
sum(width(soft_piscem))
sum(width(soft_piscem))/sum(width(piscUn))