In [1]:
# this script is for get proper orthogroups and use meta-gene method to get individual atlas, 
# then merge into a vertebrate-level orthogroup level atlas for plotting
suppressPackageStartupMessages({
    require(Seurat)
    require(dplyr)
})

In [2]:
orthogroups <- read.delim('/mnt/data01/yuanzhen/01.Vertebrate_cell_evo/02.gene_relationships/run4/results/Ortho_pipeline/OrthoFinder/Orthogroups/Orthogroups.tsv')
# at least one copy for 4 species
orthogroups <- orthogroups %>% select(c('Orthogroup', 'Pmar', 'Pvit', 'Mmus', 'Hsap'))  %>% 
    filter(Pmar != '' & Pvit != '' & Mmus != '' & Hsap != '')

In [3]:
# calculate number of genes by calcualting commas in it
count_commas <- function(x) {
  sapply(gregexpr(",", x), function(match) ifelse(match[1] == -1, 0, length(match)))
}
number_genes <- data.frame(apply(orthogroups, c(1,2), count_commas))

# retain orthogroups with only 5 copies max for each four species
orthogroups <- orthogroups[which(number_genes$Pmar <= 4 & number_genes$Pvit <= 4 & number_genes$Mmus <= 4 & number_genes$Hsap <= 4), ]

In [4]:
dim(orthogroups)

In [5]:
meta <- c('DonorID','Refined family', 'Refined subtype', 'Species')
get_metagene_obj <- function(obj, orthogroups, species){
    
    test <- orthogroups %>% dplyr::select(c(Orthogroup, species)) %>% 
        tidyr::separate_rows(species, sep = ",\\s*") %>% as.data.frame()
    test <- test[test[,species] %in% rownames(obj), ]
    
    raw <- GetAssayData(obj, layer = "counts")
    meta_raw <- Matrix.utils::aggregate.Matrix(
        raw[test[,species], ],
        groupings = test$Orthogroup,
        fun = "sum"
    )
    
    normalised <- GetAssayData(obj, layer = "data")
    meta_normalised <- Matrix.utils::aggregate.Matrix(
        normalised[test[,species], ],
        groupings = test$Orthogroup,
        fun = "sum"
    )
    
    seurat_object <- CreateSeuratObject(counts = meta_raw, meta.data = obj@meta.data[,meta])
    seurat_object[["RNA"]] <- CreateAssay5Object(counts = meta_raw, data = meta_normalised)
    return(seurat_object)
}

In [6]:
Pmar <- readRDS('Pmar.wb.iter_cluster_annotated.rds')
Pmar_metagene <- get_metagene_obj(Pmar, orthogroups, "Pmar")
rm(Pmar)
saveRDS(Pmar_metagene, "meta_genes/Pmar.wb.metagene.rds")

“[1m[22mUsing an external vector in selections was deprecated in tidyselect 1.1.0.
[36mℹ[39m Please use `all_of()` or `any_of()` instead.
  # Was:
  data %>% select(species)

  # Now:
  data %>% select(all_of(species))

See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.”


In [7]:
Pvit <- readRDS('Pvit.wb.iter_cluster_annotated.rds')
Pvit_metagene <- get_metagene_obj(Pvit, orthogroups, "Pvit")
rm(Pvit)
saveRDS(Pvit_metagene, "meta_genes/Pvit.wb.metagene.rds")

In [8]:
Mmus <- readRDS('Mmus.wb.iter_cluster_annotated.rds')
Mmus_metagene <- get_metagene_obj(Mmus, orthogroups, "Mmus")
rm(Mmus)
saveRDS(Mmus_metagene, "meta_genes/Mmus.wb.metagene.rds")

In [9]:
Hsap <- readRDS('Hsap.wb.iter_cluster_annotated.rds')
Hsap_metagene <- get_metagene_obj(Hsap, orthogroups, "Hsap")
rm(Hsap)
saveRDS(Hsap_metagene, "meta_genes/Hsap.wb.metagene.rds")

In [13]:
vertebrate_metagene <- merge(Hsap_metagene, y = c(Mmus_metagene, Pvit_metagene, Pmar_metagene))
vertebrate_metagene <- JoinLayers(vertebrate_metagene, overwrite = TRUE)
saveRDS(vertebrate_metagene, "meta_genes/Vertebrate.merged.metagene.rds")

In [None]:
write.table(orthogroups, file = "meta_genes/orthogroups.4vertebrates.txt", sep = '\t', quote = F, 
            row.names = F, col.names = T)

In [5]:
Hsap <- readRDS('meta_genes/Hsap.wb.metagene.rds')
Mmus <- readRDS('meta_genes/Mmus.wb.metagene.rds')
Pvit <- readRDS('meta_genes/Pvit.wb.metagene.rds')

In [7]:
amniote_metagene <- merge(Hsap, y = c(Mmus, Pvit))
amniote_metagene <- JoinLayers(amniote_metagene, overwrite = TRUE)
saveRDS(amniote_metagene, "meta_genes/Amniote.merged.metagene.rds")