In [None]:
library(Seurat) # v4.0
library(tidyverse)
library(argparser)
library(SingleR)
#library(celldex)
library(stringr)

In [14]:
matrix_file = '/SGRNJ06/randd/PROJECT/RD20073101_ScRNA_VDJ/20220425_kmc3/sc-1/05.count/220417014_matrix_10X/'

In [15]:
matrix_file

In [16]:
color1 <- c("OrangeRed","SlateBlue3","DarkOrange","GreenYellow","Purple","DarkSlateGray","Gold",
  "DeepPink2","Red4","#4682B4","#FFDAB9","#708090","#836FFF","#CDC673","#CD9B1D","#FF6EB4",
  "#CDB5CD","DarkGreen","#008B8B","#43CD80","#483D8B","#66CD00","#CDC673","#CDAD00","#CD9B9B",
  "#FF8247","#8B7355","#8B3A62","#68228B","#CDB7B5","#CD853F","#6B8E23","#696969","#7B68EE",
  "#9F79EE","#B0C4DE","#7A378B","#66CDAA","#EEE8AA","#00FF00","#EEA2AD","#A0522D","#000080",
  "#E9967A","#00CDCD","#8B4500","#DDA0DD","#EE9572","#EEE9E9","#8B1A1A","#8B8378","#EE9A49",
  "#EECFA1","#8B4726","#8B8878","#EEB4B4","#C1CDCD","#8B7500","#0000FF","#EEEED1","#4F94CD",
  "#6E8B3D","#B0E2FF","#76EE00","#A2B5CD","#548B54","#BBFFFF","#B4EEB4","#00C5CD","#008B8B",
  "#7FFFD4","#8EE5EE","#43CD80","#68838B","#00FF00","#B9D3EE","#9ACD32","#00688B","#FFEC8B",
  "#1C86EE","#CDCD00","#473C8B","#FFB90F","#EED5D2","#CD5555","#CDC9A5","#FFE7BA","#FFDAB9",
  "#CD661D","#CDC5BF","#FF8C69","#8A2BE2","#CD8500","#B03060","#FF6347","#FF7F50","#CD0000",
  "#F4A460","#FFB5C5","#DAA520","#CD6889","#32CD32","#FF00FF","#2E8B57","#CD96CD","#48D1CC",
  "#9B30FF","#1E90FF","#CDB5CD","#191970","#E8E8E8","#FFDAB9")

In [17]:
DIMS = 20

In [18]:
resolution = 0.6

In [19]:
matrix_name = Seurat::Read10X(matrix_file, gene.column=2)
# create seurat obj

In [20]:
rds = CreateSeuratObject(matrix_name)

“Feature names cannot have underscores ('_'), replacing with dashes ('-')”


In [17]:
# mito
all_genes = rownames(rds@assays$RNA@data)
mito.genes <- grep(pattern = "^MT-", x = all_genes, value = TRUE, ignore.case=TRUE)

In [18]:
percent.mito <- Matrix::colSums(rds@assays$RNA@counts[mito.genes,,drop=FALSE])/Matrix::colSums(rds@assays$RNA@counts)

In [19]:
rds <- AddMetaData(object = rds, metadata = percent.mito, col.name = "percent.mito")

In [20]:
rds <- NormalizeData(rds, normalization.method = "LogNormalize",scale.factor = 10000)
nfeatures = 20000

In [21]:
rds <- FindVariableFeatures(rds, selection.method = "vst", nfeatures = nfeatures, mean.cutoff = c(0.1, 8), dispersion.cutoff = c(1, Inf),
                            mean.function = ExpMean, dispersion.function = LogVMR)

In [None]:
use.genes <- rds@assays$RNA@var.features
rds <- ScaleData(rds, vars.to.regress = c("nCount_RNA", "percent.mito"), features = use.genes)

Regressing out nCount_RNA, percent.mito



In [None]:
rds <- RunPCA(object = rds, features = use.genes, do.print = FALSE)

In [None]:
rds <- FindNeighbors(rds, dims = 1:DIMS, force.recalc = TRUE, reduction = "pca")

In [None]:
rds <- FindClusters(rds, resolution = resolution)

In [None]:
# tsne and umap
rds <- RunTSNE(rds, dims = 1:DIMS, do.fast = TRUE, check_duplicates = FALSE)

In [None]:
rds = RunUMAP(rds, dims=1:DIMS)

In [3]:
ref = '/SGRNJ03/randd/cjj/Script/singleR/MouseRNAseqData.rds'

In [22]:
# SingleR
ref = readRDS(ref)
df.data = GetAssayData(rds)
pred.cluster = SingleR(test = df.data, ref = ref, clusters=Idents(rds),
    labels = ref$label.main)
celltype <-data.frame(ClusterID=rownames(pred.cluster),celltype=pred.cluster$labels,stringsAsFactors = F)
rds[['celltype']]<- celltype$celltype[match(Idents(rds), celltype$ClusterID)]

meta = rds@meta.data
assign = meta %>% select(celltype)

In [23]:
table(meta$celltype)


B cells 
   6219 

In [3]:
rds = readRDS('/SGRNJ06/randd/USER/cjj/celedev/runsingleR/20220429c3/singleR_annotation/220417014/rename.rds')

In [4]:
ref = readRDS(ref)

In [5]:
df.data = GetAssayData(rds)
pred.cluster = SingleR(test = df.data, ref = ref, clusters=Idents(rds),
    labels = ref$label.main)
celltype <-data.frame(ClusterID=rownames(pred.cluster),celltype=pred.cluster$labels,stringsAsFactors = F)
rds[['celltype']]<- celltype$celltype[match(Idents(rds), celltype$ClusterID)]


In [25]:
ref = '/SGRNJ03/randd/cjj/Script/singleR/MouseRNAseqData.rds'

In [26]:
ref = readRDS(ref)

In [27]:
ref

class: SummarizedExperiment 
dim: 21214 358 
metadata(0):
assays(1): logcounts
rownames(21214): Xkr4 Rp1 ... LOC100039574 LOC100039753
rowData names(0):
colnames(358): ERR525589Aligned ERR525592Aligned ... SRR1044043Aligned
  SRR1044044Aligned
colData names(3): label.main label.fine label.ont

In [None]:
ref$label.main

In [None]:
ref$label.fine

In [28]:
table(ref$label.main)


       Adipocytes        Astrocytes           B cells    Cardiomyocytes 
               13                27                 5                 8 
  Dendritic cells Endothelial cells  Epithelial cells      Erythrocytes 
                2                12                 2                 3 
      Fibroblasts      Granulocytes       Hepatocytes       Macrophages 
               45                15                 4                32 
        Microglia         Monocytes           Neurons          NK cells 
               72                 6                64                18 
 Oligodendrocytes           T cells 
               12                18 

In [None]:
B_cell Pre-B_cell_CD34- Pro-B_cell_CD34+ B cells

In [None]:
T_cells T cells