# Split fragment files into regional fragments

In [1]:
suppressPackageStartupMessages({
    library(Seurat)
    library(repr)
    library(patchwork)
    library(ggplot2)
    library(Signac)
    library(tidyverse)
    library(GenomicRanges)
    library(edgeR)
    library(SingleCellExperiment)
    library(Matrix)
    library(scran)
    library(scater)
    library(ggrepel)
    library(fs)
    library(tidyverse)
    library(randomForest)
    library(reticulate)
    library(pheatmap)
    library(gridExtra)
    library(RColorBrewer)
    library(MAST)
    library(data.table)
    library(ComplexHeatmap)
})
options(future.globals.maxSize = Inf)
options(Seurat.object.assay.version = "v5")
options(ggrepel.max.overlaps = Inf)

In [2]:
setwd("/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/")

In [4]:
meta_data <- read.table(
    "cell_gene_matrix/20230313_RNA_metadata.xls", 
    header = TRUE, row.names = 1, sep = "\t"
)

In [14]:
for (file in dir_ls("subclass_fragments/", glob = "*bed")) {
    subclass <- str_split_1(path_file(file), "[.]")[1]
    print(subclass)
    flush.console()
    
    if (dir_exists(sprintf("subclass_regional_fragments_peaks/%s", subclass))) {
        next
    }
    
    barcode_file <- sprintf("cell_peak_matrix/%s-barcodes.tsv", subclass)
    rds_file <- sprintf("cell_peak_matrix/%s.rds", subclass)
    
    atac_matrix <- readRDS(rds_file)
    # atac_matrix <- as(atac_matrix, "dgCMatrix")
    all_barcodes <- read.table(barcode_file, sep = "\t", header = FALSE)$V1
    subclass_meta <- meta_data[all_barcodes, ]
    
    chrom_assay <- CreateChromatinAssay(
        counts = atac_matrix,
        fragments = sprintf("subclass_fragments/%s.sorted.bed.gz", subclass),
        sep = c("-", "-")
    )
    
    seurat_object <- CreateSeuratObject(
        counts = chrom_assay,
        assay = "ATAC",
        meta.data = subclass_meta
    )
    
    dir_create(sprintf("subclass_regional_fragments_peaks/%s", subclass))
    SplitFragments(
        seurat_object,
        assay = "ATAC",
        group.by = "Region",
        outdir = sprintf("subclass_regional_fragments_peaks/%s", subclass)
    )
}

[1] "Astro"
[1] "Chandelier"
[1] "Endo"
[1] "L2_3_IT"
[1] "L4_IT"
[1] "L5_6_NP"
[1] "L5_ET"
[1] "L5_IT"
[1] "L6B"
[1] "L6_CT"
[1] "L6_IT"
[1] "L6_IT_Car3"
[1] "LAMP5"
[1] "LAMP5_LHX6"
[1] "Micro_PVM"


“Overlapping ranges supplied. Ranges should be non-overlapping.”
Computing hash

Processing file /tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/subclass_fragments/Micro_PVM.sorted.bed.gz






[1] "OPC"


“Overlapping ranges supplied. Ranges should be non-overlapping.”
Computing hash

Processing file /tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/subclass_fragments/OPC.sorted.bed.gz






[1] "Oligo"


“Overlapping ranges supplied. Ranges should be non-overlapping.”
Computing hash

Processing file /tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/subclass_fragments/Oligo.sorted.bed.gz






[1] "PAX6"


“Overlapping ranges supplied. Ranges should be non-overlapping.”
Computing hash

Processing file /tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/subclass_fragments/PAX6.sorted.bed.gz






[1] "PVALB"


“Overlapping ranges supplied. Ranges should be non-overlapping.”
Computing hash

Processing file /tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/subclass_fragments/PVALB.sorted.bed.gz






[1] "SNCG"


“Overlapping ranges supplied. Ranges should be non-overlapping.”
Computing hash

Processing file /tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/subclass_fragments/SNCG.sorted.bed.gz






[1] "SST"


“Overlapping ranges supplied. Ranges should be non-overlapping.”
Computing hash

Processing file /tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/subclass_fragments/SST.sorted.bed.gz






[1] "SST_CHODL"


“Overlapping ranges supplied. Ranges should be non-overlapping.”
Computing hash

Processing file /tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/subclass_fragments/SST_CHODL.sorted.bed.gz






[1] "VIP"


“Overlapping ranges supplied. Ranges should be non-overlapping.”
Computing hash

Processing file /tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/subclass_fragments/VIP.sorted.bed.gz






[1] "VLMC"


“Overlapping ranges supplied. Ranges should be non-overlapping.”
Computing hash

Processing file /tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/subclass_fragments/VLMC.sorted.bed.gz




