In [None]:
library(Seurat)
library(DropletUtils)
library(reticulate)
library(sceasy)
library(clustifyr)
library(ggplot2)
library(cowplot)
library(dplyr)
library(matrixStats)
library(Seurat)
library(BiocGenerics)
library(ggplot2)
library(edgeR)
library(ggrepel)
library(reticulate)
library(scmap)
library(scater)
library(plotly)
library(uwot)
library(readr)
library(clustifyr)
library(tidyverse)
library("readr")

### Load data into R

Load the already pre-annotated dataset to create a classifier out of it

In [None]:
data_10X <- Read10X(data.dir = "/path/to/10x/data", gene.column=1) 
atlas <- CreateSeuratObject(counts=data_10X)

### Create a table of variable features across the dataset

Specify thresholds of variable features to create a table of most variable genes across the dataset

In [None]:
atlas <- SetIdent(atlas, value = atlas@meta.data$celltype)
#for first time
atlas <- FindVariableFeatures(atlas, selection.method = "vst", nfeatures = 6000)
markers <- FindAllMarkers(object = atlas, return.thresh = 1e-5,only.pos = T,features = VariableFeatures(atlas))
#save top markers table
write.table(x = markers, file = "Atlas_AllMarkers_forScMapclassifier.tsv",sep = "\t")

### Choose fold change and P-value threshold for classifier creation

In [None]:
markers.sign <- markers[markers$avg_log2FC>0.5 & markers$p_val_adj<1e-5,]
markers.sign

### Create a classifer based on the selected variable features

In [None]:
set.seed(1234567)
scmap_classifier <- vector("list", length(unique(atlas$Stage)))
names(scmap_classifier) <- unique(atlas$Stage)

for(Stage in unique(atlas$Stage)){
  Idents(atlas) <- atlas$Stage
  tmp <- subset(x = atlas, idents = Stage)
  Idents(tmp) <- tmp$celltype
  tmp <- subset(x = tmp, cells = colnames(tmp)[is.na(Idents(tmp))], invert = T)
  tmp <- FindVariableFeatures(object = tmp, selection.method = 'vst', nfeatures = 200) #Choose nFeatures here
  atlas.sce <- as.SingleCellExperiment(x = tmp)
  rowData(atlas.sce)$feature_symbol <- rownames(atlas.sce)
  atlas.sce <- atlas.sce[!duplicated(rownames(atlas.sce)), ]
  counts(atlas.sce) <- as.matrix(counts(atlas.sce))
  logcounts(atlas.sce) <- as.matrix(logcounts(atlas.sce))
  #atlas.sce <- selectFeatures(atlas.sce,suppress_plot = F,n_features = 1000) # Rather use our own chosen features (cell type markers) that the default (genes with abnormal dropout rate), as results are much more biologically meaningful.
  rowData(atlas.sce)$scmap_features <- FALSE
  rowData(atlas.sce)[markers.sign$gene,"scmap_features"] <- TRUE
  atlas.sce <- indexCluster(atlas.sce,cluster_col = "celltype")
  scmap_classifier[[Stage]] <- metadata(atlas.sce)$scmap_cluster_index
}

### Save the classifier

In [None]:
saveRDS(object = scmap_classifier, file = "scmap_classifier_atlas.rds")