In [4]:
library(tidyverse)
library(magrittr)
library(patchwork)

theme_set(theme_bw())

library(Seurat)
library(SingleCellExperiment)
library(batchelor)
library(multisce)
library(scutility)

library(future)
library(furrr)
future::plan(future::multicore(workers=8))


seed <- 124
set.seed(seed)

In [5]:
path_multisce <- dir(here::here("data", "multisce"), full.names=TRUE) %>% setNames(., basename(.))

In [None]:
genes_list <- future_map(path_multisce, function(path){
    sce <- multisce_load(path=path, main_name="RNA", reduceddim_include=c())
    
    seu <- as.Seurat(sce)
    seu <- FindVariableFeatures(seu, selection.method = "vst", nfeatures = 5000, verbose=FALSE)
    
    return(VariableFeatures(seu))
})

In [5]:
genes_list %<>% map(function(genes){
    # Make sure to only include genes that are actually in GRCh38 reference
    #genes %<>% .[genes %in% rownames(sce)]
    # Do not include somatically recombined VDJC genes
    genes %<>% .[grep("^TR[ABGD][VDJC][0-9]+|^IG[HKL][VDJC][0-9]+", invert=TRUE, genes)] 
    
    data.frame(gene=genes, rank=seq_along(genes))
})

genes_list[[1]]

gene,rank
<chr>,<int>
DCD,1
MUCL1,2
SCGB2A2,3
IGHG1,4
HBB,5
SCGB1B2P,6
PIP,7
SPRR2E,8
SCGB1D2,9
IGKC,10


In [None]:
iwalk(genes_list, function(genes, run){
    saveRDS(genes, here::here("data", "multisce", run, "metadata", "seurat_hvg.rds"))
})

In [None]:
genes <- genes_list %>% bind_rows(.id="run")
genes %<>% mutate(score=max(rank)-rank)

head(genes)

In [None]:
gene_scores <- genes %>% group_by(gene) %>% summarize(score=sum(score)) %>% arrange(-score)

head(gene_scores)
dim(gene_scores)

In [None]:
write.table(gene_scores, here::here("data", "gene_scores_5k_sum_skin.tsv"), sep="\t")

In [None]:
# From https://www.sciencedirect.com/science/article/pii/S107476132030409X
gating_single <- list()
gating_single$B_cell <- c("MS4A1", "CD79A")
# B cells (marked by expression of MS4A1 and CD79A), 
gating_single$Fibro <- c("DCN", "COL6A2")
# fibroblasts (DCN and COL6A2), 
gating_single$HairFollicle <- c("SOX9")
# hair follicles (SOX9), 
gating_single$Keratinocyte <- c("KRT5","KRT1","KRT14")
# keratinocytes (KCs) (KRT5 and KRT1), 
gating_single$Dendritic_cell <- c("CD207", "CD1A", "LAMP3")
gating_single$pDC <- c("LILRA4")
gating_single$Myofibroblasts <- c("ACTA2", "PALLD", "TAGLN")
# Langerhans cells (LCs) (CD207), 
gating_single$Endo_lymph<- c("LYVE1", "PDPN")
# lymphatic endothelial cells (LYVE1), 
gating_single$Mast_cell <- c("CPA3", "IL1RL1")
# mast cells (CPA3 and IL1RL1), 
gating_single$Melanocyte <- c("MLANA")
# melanocytes (MLANA), 
gating_single$Macrophage <- c("CD163", "CCL18", "CPVL")
# myeloid cells (CD68 and CTSS), 
gating_single$Plasma_cell <- c("IGHG1", "IGKC")
# plasma cells (IGHG1), 
gating_single$Schwann_cell <- c("SCN7A")
# Schwann cells (SCN7A), 
gating_single$Sebocyte <- c("DCD")
# sebocytes (DCD), 
gating_single$T_cell <- c("CD3D", "CD3E")
# T cells (CD3D and TRBC2), 
gating_single$Endo_venular <- c("SELE", "CD93","VWF")
# venular endothelial cells (ECs) (SELE and CD93),
gating_single$VSMC <- c("TAGLN")
# vascular smooth muscle cells (VSMCs) (TAGLN)

In [None]:
lineage_genes <- gating_single %>% Reduce(append, .)
lineage_genes

write.table(lineage_genes, here::here("data", "skin_lineage_genes.tsv"), sep="\t")