In [None]:
### Analysis of single cell RNAseq and cell-surface antibody expression
### Initial processing via PIPseeker v0.52 (Fluent) and matrix files uploaded for downstream analyses

In [None]:
library(Seurat)
library(tidyverse)
library(progeny)
library(fgsea)
library(dorothea)
library(ggplot2)
library(openxlsx)
library(HGNChelper)
library(clustifyr)
library(cowplot)
library(corrplot)
library(metap)
library(pheatmap)
library(CytoTRACE)
library(colorspace)

In [2]:
#Set Filepaths and Load Data
samplename = [] #set sample name
read_data <- function(samplename) { 
    filepathbarcodes <- paste0("/home/", samplename, "/barcodes_", samplename, '_.tsv')
    filepathfeatures <- paste0("/home/", samplename, "/genes_", samplename, '_.tsv')
    filepathmat <- paste0("/home/", samplename, "/matrix_", samplename, '_.mtx')
  
    barcodes <- read.delim(filepathbarcodes, header=F)
    features <- read.delim(filepathfeatures, header=F)
    mat <- Matrix::readMM(filepathmat)
  
    colnames(mat) <- barcodes[,1]
    rownames(mat) <- make.unique(features$V2)
    patient.data <- mat
    
    seurat1 <- CreateSeuratObject(counts = patient.data, project = samplename)
    
    seurat1[['percent.mito']] <- PercentageFeatureSet(seurat1, pattern = 'MT-')
    seurat1 <- subset(seurat1, subset = percent.mito < 15 & nFeature_RNA > 200 & nFeature_RNA < 2500)
    
    seurat_RNA <- subset(seurat1, features = rownames(seurat1)[! grepl('^ADT-', x = rownames(seurat1))])
    seurat_ADT <- subset(seurat1, features = rownames(seurat1)[grepl('^ADT-', x = rownames(seurat1))])
    
    return(c(seurat_RNA, seurat_ADT))
}

for (s in sampesnames) {
    paste("seurat","_",s) <- readdata(s)
    paste("seurat","_",s,"_","RNA") <- paste("seurat","_",s)[[1]]
    paste("seurat","_",s,"_","ADT") <- paste("seurat","_",s)[[2]]

}

#Integrate RNA via RPCA
ifnb.list_RNA = list(seurat_RNA[,:])
ifnb.list_ADT = list(seurat_ADT[,:])

ifnb.list_RNA <- lapply(X = ifnb.list2, FUN = function(x) {
    x <- NormalizeData(x, verbose = FALSE)
    x <- FindVariableFeatures(x, selection.method = "vst", nfeatures = 2000)
})

#Integrate ADT data with RPCA, normalization method CLR
ifnb.list_ADT <- lapply(X = ifnb.list_ADT, FUN = function(x) {
    x <- NormalizeData(x, normalization.method = 'CLR', margin = 2)
    x <- FindVariableFeatures(x, selection.method = "vst")
})

Patientfeatures <- SelectIntegrationFeatures(object.list = ifnb.list3)
ifnb.list_ADT <- lapply(X = ifnb.list3, FUN = function(x) {
    x <- ScaleData(x, features = Patientfeatures, verbose = FALSE)
    x <- RunPCA(x, features = Patientfeatures, verbose = FALSE)
})

Patientanchors <- FindIntegrationAnchors(object.list = ifnb.list_ADT, reduction = "rpca", dims = 1:15)
Patient_ALL_ADT <- IntegrateData(anchorset = Patientanchors, dims = 1:15)

#Scale integrated datasets
DefaultAssay(Patient_ALL_ADT) <- "integrated"
Patient_ALL_ADT <- ScaleData(Patient_ALL_ADT, features=rownames(Patient_ALL_ADT))

DefaultAssay(Patient_ALL_RNA) <- "integrated"
Patient_ALL_RNA <- ScaleData(Patient_ALL_RNA, verbose = FALSE)


#PCA, Clustering, and Generation of RNA-derived UMAP (Figure 1E)
#Right
DefaultAssay(Patient_ALL_RNA_ADT) <- "integrated"

Patient_ALL_RNA_ADT <- RunPCA(Patient_ALL_RNA_ADT, npcs = 30, verbose = FALSE)
Patient_ALL_RNA_ADT <- RunUMAP(Patient_ALL_RNA_ADT, reduction = "pca", dims = 1:30)
Patient_ALL_RNA_ADT <- FindNeighbors(Patient_ALL_RNA_ADT, reduction = "pca", dims = 1:30)
Patient_ALL_RNA_ADT <- FindClusters(Patient_ALL_RNA_ADT, resolution = 0.6)

ADToverlayedonRNAUMAP <- FeaturePlot(Patient_ALL_RNA_ADT, features_ADT, cols = c("purple", "yellow"), reduction = 'umap', label = TRUE, combine = TRUE)
#Left
Patient_ALL_RNA_ADT <- SetIdent(Patient_ALL_RNA_ADT, value = Patient_ALL_RNA_ADT@meta.data$new_clusters)

cellsbytype <- DimPlot(Patient_ALL_RNA_ADT, group.by = "ident", label = TRUE) + ggtitle('Cells by Type') + NoAxes()
cellsbypatient <- DimPlot(Patient_ALL_RNA_ADT, group.by = "patient") + ggtitle('Cells by Patient') + NoAxes(
options(repr.plot.width=16, repr.plot.height=8)
cellsbytype + cellsbypatient
    
#Differential Gene Expression (Figure 1F)
DefaultAssay(Patient_ALL_RNA_ADT) <- "RNA"
listofclusters <- c("Leukemia", 'B', 'T', "Plasma B", 'Erythroid', 'NK', 
                    'Myeloid DC', 'pDC', 'CD16+ Monocytes') 
    
for (i in 0:(length(listofclusters)-1))
{
    LIST.CLUSTERS.and.MARKERS[[i+1]] <- FindConservedMarkers(Patient_ALL_RNA_ADT, ident.1 = listofclusters[[i+1]], grouping.var = "orig.ident", only.pos = TRUE)
}
LIST.CLUSTERS.and.MARKERS
    
new$cluster <- factor(new$cluster, levels = listofclusters)
new <- new[order(new$cluster), ]

heatmapconservedmarkers <- DoHeatmap(subset(Patient_ALL_RNA_ADT, downsample = 5000),features = new$gene)
    
    
#Define ADT Clusters and Overlay on RNA-derived UMAP (Figure 4A)
DefaultAssay(Patient_ALL_RNA_ADT) <- "ADTonly"
    
Patient_ALL_RNA_ADT <- SetIdent(Patient_ALL_RNA_ADT, value = Patient_ALL_RNA_ADT@meta.data$ADT_integrated_nn_res.0.6) 
ADT <-  c("ADT-IgG1", "ADT-HLA-DR", "ADT-CD3", "ADT-CD4", "ADT-CD5",
          "ADT-CD7", "ADT-CD10", "ADT-CD11b", "ADT-CD13",
          "ADT-CD14", "ADT-CD19", "ADT-CD22", "ADT-CD30", "ADT-CD33", 
          "ADT-CD34", "ADT-CD45", "ADT-CD56", "ADT-CD64", "ADT-CD117")
    
DotPlot(Patient_ALL_RNA_ADT, features = rev(features_ADT), cols = c("blue", "red"), dot.scale = 8) + RotatedAxis()
    
Patient_ALL_RNA_ADT$ADTclusters <- Idents(Patient_ALL_RNA_ADT)
Patient_ALL_RNA_ADT <- SetIdent(Patient_ALL_RNA_ADT, value = Patient_ALL_RNA_ADT@meta.data$ADTclusters)
ADToverlayedonADTUMAP <- FeaturePlot(Patient_ALL_RNA_ADT, features_ADT,  reduction = 'adt.umap', label = TRUE, combine = TRUE)
    
#Differential Gene Expression of ADT Clusters (Figure 4B)
DefaultAssay(Patient_ALL_RNA_ADT) <- "RNA"
Patient_ALL_RNA_ADT <- SetIdent(Patient_ALL_RNA_ADT, value = Patient_ALL_RNA_ADT@meta.data$ADTclusters)
cluster_markers_RNAADT <- FindAllMarkers(Patient_ALL_RNA_ADT, only.pos = T)
cluster_markers_RNAADT_10 <- cluster_markers_RNAADT %>%
  group_by(cluster) %>%
  top_n(10, avg_log2FC)
    
    
#Cytotrace Analysis (Figure 5A, B, C)
#Downsampled to 3000 cells per leukemia population per patient
Patient_ALL_RNA_ADT <- SetIdent(Patient_ALL_RNA_ADT, value = Patient_ALL_RNA_ADT@meta.data$newclusters)
leukemiacluster <- subset(Patient_ALL_RNA_ADT, idents = 'Leukemia')
    
DefaultAssay(Patient_ALL_RNA_ADT) <- 'RNA'
cyto <- as.matrix(GetAssayData(Patient_ALL_RNA_ADT, "counts"))
results <- CytoTRACE(cyto)
    
cyto_UMAP <- Patient_ALL_RNA_ADTdownsampled@reductions[["umap"]]@cell.embeddings
plotCytoTRACE(
    cyto_obj = results,
    phenotype = NULL,
    emb = cyto_UMAP
)

corrmatrix <- cor(cyto, ADT)
corrplot(corrmatrix, method="color", is.corr=FALSE,  
         pch.cex = 0.9,insig = 'label_sig', pch.col = 'grey20',
         col = diverging_hcl(100, palette = "Blue-Red 2"),
         tl.col = "black", cl.cex = 0.9, cl.ratio = 0.1, cl.pos = 'b', cl.offest = 0.5, cl.length = 3)
    
    
#Split by CytoTRACE score
Patient_ALL_RNA_ADTdownsampled$cyto_clusters <- Patient_ALL_RNA_ADTdownsampled$newclusters
metadata <- Patient_ALL_RNA_ADTdownsampled[[]]
metadata$Primitiveclusters <- as.character(metadata$Primitiveclusters)  

metadata[metadata$newclusters == "low_cytotrace" & metadata$Cytotrace > 0.95, "cyto_clusters"] <- "high_cytotrace"
Patient_ALL_RNA_ADTdownsampled$cyto)clusters <- metadata$cyto_clusters

findmarkers <- FindMarkers(Patient_ALL_RNA_ADTdownsampled, ident.1 = 'high_cytotrace', ident.2 = 'low_cytotrace',
                          logfc.threshold = log(0.25))

subset <- subset(Patient_ALL_RNA_ADTdownsampled, idents = c('high_cytotrace', 'low_cytotrace'))
heatmapfindmarkers <- DoHeatmap(subset, features = row.names(findmarkers))

ERROR: Error in parse(text = x, srcfile = src): <text>:2:14: unexpected '['
1: #Set Filepaths and Load Data
2: samplename = [
                ^
