In [1]:
# load package and function for getting matrix of pseudobulk expression level (exp) and percentage of expression (pct)
require(Seurat)
PercentAbove <- function(x, threshold) { return(length(x = x[x > threshold]) / length(x = x)) }
get_pct_matrix <- function(object, vars, cells, group.by){
    data.features <- FetchData(object = object, vars = vars, cells = cells)
    data.features$id <- if (!is.null(x = group.by)) {
        object[[group.by, drop = TRUE]][cells, drop = TRUE]
    }
    
    res <- Reduce(rbind, lapply(X = unique(x = data.features$id), FUN = function(ident) {
        data.use <- data.features[data.features$id == ident, 1:(ncol(x = data.features) - 1), drop = FALSE]
        pct.exp <- apply(X = data.use, MARGIN = 2, FUN = PercentAbove, threshold = 0)
        return(pct.exp)
    }))
    rownames(res) <- unique(data.features$id)
    res <- t(res)
    return(res)
}

Loading required package: Seurat

Loading required package: SeuratObject

Loading required package: sp


Attaching package: ‘SeuratObject’


The following object is masked from ‘package:base’:

    intersect




In [4]:
# for lamprey
Pmar <- readRDS("/mnt/data01/yuanzhen/01.Vertebrate_cell_evo/01.data/02.atlas_final/2.samap/4.final/Pmar.wb.iter_cluster_annotated.rds")
Pmar_exp <- AverageExpression(object = Pmar, group.by = "Refined family")$RNA
Pmar_pct <- get_pct_matrix(object = Pmar, vars = rownames(Pmar), cells=colnames(Pmar), group.by="Refined family")
saveRDS(Pmar_exp, "0.bin/Pmar_exp.rds")
saveRDS(Pmar_pct, "0.bin/Pmar_pct.rds")
rm(Pmar)

“sparse->dense coercion: allocating vector of size 7.6 GiB”


In [5]:
# for lizard, lizard atlas genes were NCBI version, need to be converted to Ensembl version
Pvit <- readRDS("/mnt/data01/yuanzhen/01.Vertebrate_cell_evo/01.data/02.atlas_final/2.samap/4.final/Pvit.wb.iter_cluster_annotated.rds")
Pvit_exp <- AverageExpression(object = Pvit, group.by = "Refined family")$RNA
Pvit_pct <- get_pct_matrix(object = Pvit, vars = rownames(Pvit), cells=colnames(Pvit), group.by="Refined family")
rm(Pvit)
saveRDS(Pvit_exp, "0.bin/Pvit_exp.rds")
saveRDS(Pvit_pct, "0.bin/Pvit_pct.rds")

“sparse->dense coercion: allocating vector of size 28.1 GiB”


In [7]:
# for mouse
Mmus <- readRDS("/mnt/data01/yuanzhen/01.Vertebrate_cell_evo/01.data/02.atlas_final/2.samap/4.final/Mmus.wb.iter_cluster_annotated.rds")
Mmus_exp <- AverageExpression(object = Mmus, group.by = "Refined family")$RNA
Mmus_pct <- get_pct_matrix(object = Mmus, vars = rownames(Mmus), cells=colnames(Mmus), group.by="Refined family")
rm(Mmus)
saveRDS(Mmus_exp, "0.bin/Mmus_exp.rds")
saveRDS(Mmus_pct, "0.bin/Mmus_pct.rds")

“sparse->dense coercion: allocating vector of size 18.0 GiB”


In [8]:
# for human
Hsap <- readRDS("/mnt/data01/yuanzhen/01.Vertebrate_cell_evo/01.data/02.atlas_final/2.samap/4.final/Hsap.wb.iter_cluster_annotated.rds")
Hsap_exp <- AverageExpression(object = Hsap, group.by = "Refined family")$RNA
Hsap_pct <- get_pct_matrix(object = Hsap, vars = rownames(Hsap), cells=colnames(Hsap), group.by="Refined family")
rm(Hsap)

saveRDS(Hsap_exp, "0.bin/Hsap_exp.rds")
saveRDS(Hsap_pct, "0.bin/Hsap_pct.rds")

“sparse->dense coercion: allocating vector of size 29.3 GiB”


In [9]:
# for amphioxus, the contained rRNA and sRNA gene need to be removed
IDs <- read.table('/mnt/data01/yuanzhen/01.Vertebrate_cell_evo/01.data/02.atlas/amphioxus/IDversion', header = T)
# gene name change for amphioxus
convert_IDs <- function(vectors){
    vapply(vectors, FUN = function(x){
               x = gsub('-', '_', x)
               if (x %in% IDs$V1){
                   return(IDs[IDs$V1 == x, "V2"])
               }else{
                   return(x)
               }}, FUN.VALUE = character(1))
}
Bflo <- readRDS("/mnt/data01/yuanzhen/01.Vertebrate_cell_evo/01.data/02.atlas/amphioxus/T1_stage_subset.rds")


In [10]:
Bflo_exp <- AverageExpression(object = Bflo, group.by = "celltype")$RNA
Bflo_pct <- get_pct_matrix(object = Bflo, vars = rownames(Bflo), cells=colnames(Bflo), group.by="celltype")
rm(Bflo)

Names of identity class contain underscores ('_'), replacing with dashes ('-')
[90mThis message is displayed once every 8 hours.[39m
“sparse->dense coercion: allocating vector of size 1.4 GiB”


In [11]:
rownames(Bflo_exp) <- convert_IDs(rownames(Bflo_exp))
Bflo_exp <- Bflo_exp[!grepl(pattern = "RNA", rownames(Bflo_exp)),]
rownames(Bflo_pct) <- convert_IDs(rownames(Bflo_pct))
Bflo_pct <- Bflo_pct[!grepl(pattern = "RNA", rownames(Bflo_pct)),]

In [12]:
saveRDS(Bflo_exp, "0.bin/Bflo_T1_exp.rds")
saveRDS(Bflo_pct, "0.bin/Bflo_T1_pct.rds")