# Merge rna count matrix in a regional manner

In [1]:
suppressPackageStartupMessages({
    library(Seurat)
    library(repr)
    library(patchwork)
    library(ggplot2)
    library(Signac)
    library(tidyverse)
    library(GenomicRanges)
    library(edgeR)
    library(SingleCellExperiment)
    library(Matrix)
    library(scran)
    library(scater)
    library(ggrepel)
    library(fs)
})
options(future.globals.maxSize = Inf)
options(Seurat.object.assay.version = "v5")
options(ggrepel.max.overlaps = Inf)

In [2]:
setwd("/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/")

In [3]:
subclasses <- c()
for (file in dir_ls("cell_gene_matrix/", glob = "*rds")) {
    subclass <- str_split_1(path_file(file), pattern = ".rds")[1]
    subclasses <- c(subclasses, subclass)
}
subclass_matrices <- vector(mode = "list", length = length(subclasses))
names(subclass_matrices) <- subclasses
for (file in dir_ls("cell_gene_matrix/", glob = "*rds")) {
    subclass <- str_split_1(path_file(file), pattern = ".rds")[1]
    subclass_matrices[[subclass]] <- readRDS(file)
}

In [4]:
column_merge_dgC_matrices <- function(matrix_list) {
    all_rows <- unique(unlist(lapply(matrix_list, rownames)))
    all_rows <- sort(all_rows)
    
    aligned_matrices <- lapply(matrix_list, function(mat) {
        mat_extended <- Matrix(0, nrow = length(all_rows), ncol = ncol(mat), sparse = TRUE)
        rownames(mat_extended) <- all_rows
        colnames(mat_extended) <- colnames(mat)
        mat_extended[rownames(mat), ] <- mat
        return(mat_extended)
    })
    
    do.call(cbind, aligned_matrices)
}

In [5]:
meta_table <- read.table(
    "cell_gene_matrix/20230313_RNA_metadata.xls", 
    sep = "\t", header = TRUE, row.names = 1
)

In [28]:
region_names <- unique(meta_table$Region)
region_combined_matrices <- vector(mode = "list", length = length(region_names))
names(region_combined_matrices) <- region_names

for (region in region_names) {
    print(region)
    flush.console()
    region_table <- meta_table[meta_table$Region == region, ]
    region_matrices <- vector(mode = "list", length = length(subclass_matrices))
    names(region_matrices) <- names(subclass_matrices)
    for (subclass in names(subclass_matrices)) {
        subclass_matrix <- subclass_matrices[[subclass]]
        region_matrices[[subclass]] <- subclass_matrix[, colnames(subclass_matrix) %in% rownames(region_table)]
    }
    
    all_rows <- rownames(region_matrices[["L2_3_IT"]])
    for (subclass in names(region_matrices)) {
        region_matrices[[subclass]] <- region_matrices[[subclass]][all_rows, ]
    }
    region_combined_matrices[[region]] <- do.call(cbind, region_matrices)
}

[1] "A1C"
[1] "A9"
[1] "A24"
[1] "AnG"
[1] "FI"
[1] "M1C"
[1] "MTG"
[1] "S1C"
[1] "V1C"


In [29]:
for (region in names(region_combined_matrices)) {
    saveRDS(
        region_combined_matrices[[region]], 
        sprintf("regional_combined_datasets/rna_counts/%s.rds", region)
    )
}