# Prep RNA and ATAC data in h5ad format for several cell types

In [1]:
suppressPackageStartupMessages({
    library(Seurat)
    library(repr)
    library(patchwork)
    library(ggplot2)
    library(Signac)
    library(tidyverse)
    library(GenomicRanges)
    library(edgeR)
    library(SingleCellExperiment)
    library(Matrix)
    library(scran)
    library(scater)
    library(ggrepel)
    library(fs)
    library(tidyverse)
    library(randomForest)
    library(reticulate)
    library(pheatmap)
    library(gridExtra)
    library(RColorBrewer)
    library(MAST)
    library(data.table)
    library(ComplexHeatmap)
})
options(future.globals.maxSize = Inf)
options(Seurat.object.assay.version = "v5")
options(ggrepel.max.overlaps = Inf)

## Oligo

In [2]:
root_dir <- "/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/cell_peak_matrix/"
setwd(root_dir)

In [3]:
Oligo_atac <- readRDS("Oligo.rds")

In [4]:
meta_data <- read.table(
    "../cell_gene_matrix/20230313_RNA_metadata.xls", sep = "\t", header = TRUE)

In [5]:
write.table(
    meta_data[colnames(Oligo_atac), ], "../scenicplus/Oligo/Oligo_meta_data.tsv",
    col.names = TRUE, row.names = TRUE, quote = FALSE, sep = "\t")

In [6]:
writeMM(Oligo_atac, "../scenicplus/Oligo/Oligo_atac_matrix.mtx")

NULL

In [7]:
write.table(
    colnames(Oligo_atac), "../scenicplus/Oligo/Oligo_cell_barcodes.tsv",
    col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)

In [8]:
write.table(
    rownames(Oligo_atac), "../scenicplus/Oligo/Oligo_atac_regions.tsv",
    col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)

In [9]:
root_dir <- "/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/cell_gene_matrix/"
setwd(root_dir)

In [10]:
Oligo_rna <- readRDS("Oligo.rds")

In [11]:
Oligo_barcodes <- read.table(
    "../scenicplus/Oligo/Oligo_cell_barcodes.tsv", 
    header = FALSE, sep = "\t")

In [12]:
Oligo_rna_filtered <- Oligo_rna[, Oligo_barcodes$V1]

In [13]:
writeMM(Oligo_rna_filtered, "../scenicplus/Oligo/Oligo_rna_matrix.mtx")

NULL

In [14]:
write.table(
    rownames(Oligo_rna_filtered), "../scenicplus/Oligo/Oligo_gene_names.tsv",
    col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)

## Micro

In [15]:
root_dir <- "/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/cell_peak_matrix/"
setwd(root_dir)

In [16]:
Micro_atac <- readRDS("Micro.rds")

In [17]:
meta_data <- read.table(
    "../cell_gene_matrix/20230313_RNA_metadata.xls", sep = "\t", header = TRUE)

In [18]:
write.table(
    meta_data[colnames(Micro_atac), ], "../scenicplus/Micro/Micro_meta_data.tsv",
    col.names = TRUE, row.names = TRUE, quote = FALSE, sep = "\t")

In [19]:
writeMM(Micro_atac, "../scenicplus/Micro/Micro_atac_matrix.mtx")

NULL

In [20]:
write.table(
    colnames(Micro_atac), "../scenicplus/Micro/Micro_cell_barcodes.tsv",
    col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)

In [21]:
write.table(
    rownames(Micro_atac), "../scenicplus/Micro/Micro_atac_regions.tsv",
    col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)

In [22]:
root_dir <- "/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/cell_gene_matrix/"
setwd(root_dir)

In [23]:
Micro_rna <- readRDS("Micro.rds")

In [24]:
Micro_barcodes <- read.table(
    "../scenicplus/Micro/Micro_cell_barcodes.tsv", 
    header = FALSE, sep = "\t")

In [25]:
Micro_rna_filtered <- Micro_rna[, Micro_barcodes$V1]

In [26]:
writeMM(Micro_rna_filtered, "../scenicplus/Micro/Micro_rna_matrix.mtx")

NULL

In [27]:
write.table(
    rownames(Micro_rna_filtered), "../scenicplus/Micro/Micro_gene_names.tsv",
    col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)

## L2/3 IT

In [2]:
root_dir <- "/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/cell_peak_matrix/"
setwd(root_dir)

In [3]:
L2_3_IT_atac <- readRDS("L2_3_IT.rds")

In [4]:
meta_data <- read.table(
    "../cell_gene_matrix/20230313_RNA_metadata.xls", sep = "\t", header = TRUE)

In [5]:
write.table(
    meta_data[colnames(L2_3_IT_atac), ], "../scenicplus/L2_3_IT/L2_3_IT_meta_data.tsv",
    col.names = TRUE, row.names = TRUE, quote = FALSE, sep = "\t")

In [6]:
writeMM(L2_3_IT_atac, "../scenicplus/L2_3_IT/L2_3_IT_atac_matrix.mtx")

NULL

In [7]:
write.table(
    colnames(L2_3_IT_atac), "../scenicplus/L2_3_IT/L2_3_IT_cell_barcodes.tsv",
    col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)

In [8]:
write.table(
    rownames(L2_3_IT_atac), "../scenicplus/L2_3_IT/L2_3_IT_atac_regions.tsv",
    col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)

In [9]:
root_dir <- "/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/cell_gene_matrix/"
setwd(root_dir)

In [10]:
L2_3_IT_rna <- readRDS("L2_3_IT.rds")

In [11]:
L2_3_IT_barcodes <- read.table(
    "../scenicplus/L2_3_IT/L2_3_IT_cell_barcodes.tsv", 
    header = FALSE, sep = "\t")

In [12]:
L2_3_IT_rna_filtered <- L2_3_IT_rna[, L2_3_IT_barcodes$V1]

In [13]:
writeMM(L2_3_IT_rna_filtered, "../scenicplus/L2_3_IT/L2_3_IT_rna_matrix.mtx")

NULL

In [14]:
write.table(
    rownames(L2_3_IT_rna_filtered), "../scenicplus/L2_3_IT/L2_3_IT_gene_names.tsv",
    col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)

## L5 IT

In [15]:
root_dir <- "/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/cell_peak_matrix/"
setwd(root_dir)

In [16]:
L5_IT_atac <- readRDS("L5_IT.rds")

In [17]:
meta_data <- read.table(
    "../cell_gene_matrix/20230313_RNA_metadata.xls", sep = "\t", header = TRUE)

In [18]:
write.table(
    meta_data[colnames(L5_IT_atac), ], "../scenicplus/L5_IT/L5_IT_meta_data.tsv",
    col.names = TRUE, row.names = TRUE, quote = FALSE, sep = "\t")

In [19]:
writeMM(L5_IT_atac, "../scenicplus/L5_IT/L5_IT_atac_matrix.mtx")

NULL

In [20]:
write.table(
    colnames(L5_IT_atac), "../scenicplus/L5_IT/L5_IT_cell_barcodes.tsv",
    col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)

In [21]:
write.table(
    rownames(L5_IT_atac), "../scenicplus/L5_IT/L5_IT_atac_regions.tsv",
    col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)

In [22]:
root_dir <- "/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/cell_gene_matrix/"
setwd(root_dir)

In [23]:
L5_IT_rna <- readRDS("L5_IT.rds")

In [24]:
L5_IT_barcodes <- read.table(
    "../scenicplus/L5_IT/L5_IT_cell_barcodes.tsv", 
    header = FALSE, sep = "\t")

In [25]:
L5_IT_rna_filtered <- L5_IT_rna[, L5_IT_barcodes$V1]

In [26]:
writeMM(L5_IT_rna_filtered, "../scenicplus/L5_IT/L5_IT_rna_matrix.mtx")

NULL

In [27]:
write.table(
    rownames(L5_IT_rna_filtered), "../scenicplus/L5_IT/L5_IT_gene_names.tsv",
    col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)

## Other cell types

In [3]:
root_dir <- "/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/cell_peak_matrix/"
setwd(root_dir)

In [4]:
meta_data <- read.table(
    "../cell_gene_matrix/20230313_RNA_metadata.xls", sep = "\t", header = TRUE)

In [7]:
for (file in dir_ls("./", glob = "*rds")) {
    root_dir <- "/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/cell_peak_matrix/"
    setwd(root_dir)
    celltype <- str_split(file, pattern = "[.]")[[1]][1]
    
    if (celltype %in% c("Oligo", "Micro", "L2_3_IT", "L4_IT", "L5_IT", "L5_ET", "SST_CHODL")) {
        next
    }
    print(celltype)
    
    other_atac <- readRDS(file)
    write.table(
        meta_data[colnames(other_atac), ], sprintf("../scenicplus/other_celltypes/%s_meta_data.tsv", celltype),
        col.names = TRUE, row.names = TRUE, quote = FALSE, sep = "\t")
    writeMM(other_atac, sprintf("../scenicplus/other_celltypes/%s_atac_matrix.mtx", celltype))
    write.table(
        colnames(other_atac), sprintf("../scenicplus/other_celltypes/%s_cell_barcodes.tsv", celltype),
        col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)
    write.table(
        rownames(other_atac), sprintf("../scenicplus/other_celltypes/%s_atac_regions.tsv", celltype),
        col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)
    
    root_dir <- "/tscc/projects/ps-epigen/users/biy022/biccn/data/SNAREdata/cell_gene_matrix/"
    setwd(root_dir)
    other_rna <- readRDS(sprintf("%s.rds", celltype))
    other_barcodes <- read.table(
        sprintf("../scenicplus/other_celltypes/%s_cell_barcodes.tsv", celltype), 
        header = FALSE, sep = "\t")
    other_rna_filtered <- other_rna[, other_barcodes$V1]
    writeMM(other_rna_filtered, sprintf("../scenicplus/other_celltypes/%s_rna_matrix.mtx", celltype))
    write.table(
        rownames(other_rna_filtered), sprintf("../scenicplus/other_celltypes/%s_gene_names.tsv", celltype),
        col.names = FALSE, quote = FALSE, sep = "\t", row.names = FALSE)
}

[1] "Astro"
[1] "Chandelier"
[1] "Endo"
[1] "L5_6_NP"
[1] "L6B"
[1] "L6_CT"
[1] "L6_IT"
[1] "L6_IT_Car3"
[1] "LAMP5"
[1] "LAMP5_LHX6"
[1] "OPC"
[1] "PAX6"
[1] "PVALB"
[1] "SNCG"
[1] "SST"
[1] "VIP"
[1] "VLMC"
