# CITEseq data analysis
*Author: Lena Boehme, Taghon lab, 2023*

## Annotation - new

This script documents the second round of annotation of the CITE-seq data. The entire dataset went through a prior round of annotation and marker discovery (see script 3A_annotation_old), which has provided a reference for the new annotation in terms of individual cell labels and gene/protein expression signatures associated with specific stages.

The annotations defined in this script are the ones used in the HTSA manuscript, old annotations represent those in the bioRxiv preprint.

### Setup

In [None]:
options(repr.plot.width=12, repr.plot.height=6)

options(scipen=100) #avoid scientific notation of numbers

library(SeuratDisk)
library(Seurat)
library(matrixStats)
library(ggplot2)
library(pheatmap)
library(reshape2)
library(dplyr)
library(tidyr)
library(viridis)
library(RColorBrewer)
library(stringr)
library(ggalluvial)

library(batchelor)
library(BiocParallel)
library(BiocNeighbors)

library(singleCellHaystack)

library(slingshot)
library(SingleCellExperiment)

In [None]:
sessionInfo()

In [None]:
pal24 <- colorRampPalette(brewer.pal(12, "Paired"))(24)
pal36 <- colorRampPalette(brewer.pal(12, "Paired"))(36)
pal12 <- colorRampPalette(brewer.pal(12, "Paired"))(12)
pal60 <- colorRampPalette(brewer.pal(12, "Paired"))(60)
pal40 <- colorRampPalette(brewer.pal(12, "Paired"))(40)

In [None]:
#PC cutoff function
PCcutoff <- function(pca, diff){
    var_pc <- pca@stdev/sum(pca@stdev)*100 #calculate % variation associated with each PC
    diffvar_pc <- var_pc[1:length(var_pc)-1] - var_pc[2:length(var_pc)] #calculate difference in variation between subsequent PC
    sort(which(diffvar_pc >diff), decreasing=TRUE)[1]+1 #determine which PC is the last one where the variation is more that x% higher compared to the next PC
}
#pca: the pca slot in the seurat object
#diff: the % difference in variation between two subsequent PCs to be used as cutoff

We load the newly processed data that has been QC-ed, denoised and integration. We still have old annotations from RNA-based annotation and the previous iteration of CITE-seq annotation, which we can utilise as reference if needed.

In [None]:
seurObj <- LoadH5Seurat('./HTSA_CITE_WNN_corr.h5seurat')

In [None]:
DefaultAssay(seurObj) <- 'ADTdsb'

In [None]:
ABs <- rownames(seurObj@assays$ADTdsb@data)[c(1:130, 138:150)]

We have retained RNA- and CITE-based cell annotations for cells that had been included in the previous annotation workflow. We can use these as a rough guideline.

Of note, we had previously observed technical artefacts during annotation, namely antibody aggregates for certain markers that resulted in false-positive staining on a handful of cells. Based on this knowledge the affected cells have already been filtered from this new version of the data set.

In [None]:
options(repr.plot.width=12, repr.plot.height=6)

DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'pred_cell_type_level_4', cols = sample(pal60))+labs(title="WNN UMAP - broad RNA annotations")
DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'anno_CITE_4v5', cols = sample(pal60))+labs(title="WNN UMAP - previous CITE annotations")

In [None]:
seurObj$anno_CITE_4v5 %>% table
seurObj$anno_CITE_old <- seurObj$anno_CITE_4v5

The CITE annotations still contain several outdated labels that originated in RNA clustering but do no represent true CITE-seq derived cell categorisation. For better old-new comparisons, we replace these labels with the appropriate overarching CITE-seq annotation.

In [None]:
Idents(seurObj) <- seurObj$anno_CITE_old
seurObj@meta.data[WhichCells(seurObj, idents = c('', 'nan')),'anno_CITE_old'] <- 'unknown'
seurObj@meta.data[WhichCells(seurObj, idents = c('DP(Q)_Th2', 'DP(Q)_HSPH1', 'DP(Q)_CD199', 'DP(Q)_CD99_CD31lo', 'DP(Q)_CD99_CD31hi')),'anno_CITE_old'] <- 'DP(Q)_rearr'

In [None]:
celltypes <- names(table(seurObj$anno_CITE_old))
names(celltypes) <- seq_along(celltypes)
celltypes

In [None]:
celltypes_ordered <- celltypes[c(39, 6:9, 12, 14, 16, 15, 17, 13, 11, 27, 29, 28, 30, 33, 31, 32, 35:37, 34, 38,2:5, 18, 20, 19, 21, 26, 24, 25, 22, 23, 1, 10, 40)]

In [None]:
seurObj$anno_CITE_old <- factor(seurObj$anno_CITE_old, levels=celltypes_ordered)

In [None]:
seurObj$anno_CITE_old %>% table

### Cell cycle scoring

Cell cycle scoring was not retained and the updated RNA processing may have shifted the detection of certain genes. We therefore repeat the scoring on the new data to determine cell cycle stages for each cells.

In [None]:
DefaultAssay(seurObj) <- 'RNA'

seurObj <- CellCycleScoring(seurObj, s.features = cc.genes.updated.2019$s.genes, g2m.features = cc.genes.updated.2019$g2m.genes, set.ident = FALSE)

DefaultAssay(seurObj) <- 'ADTdsb'

In [None]:
options(repr.plot.width=8, repr.plot.height=6)

DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'Phase')+labs(title="WNN UMAP - Cell cycle phase")

### Rough clustering

To obtain rough subpopulations that can serve as a starting point for annotation, we cluster on the sPCA (previously calculated on the batch corrected WNN UMAP) with low resolution.

In [None]:
ptm <- proc.time()

seurObj <- FindNeighbors(seurObj,reduction= 'spca', dims=1:15)

proc.time() - ptm

In [None]:
ptm <- proc.time()

seurObj <- FindClusters(seurObj, graph.name = 'RNA_snn', algorithm = 4,
                        resolution = 0.5, random.seed = 123, method = 'igraph',
                       cluster.name='clusters_rough')

proc.time() - ptm

In [None]:
table(seurObj$clusters_rough)

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'clusters_rough', cols=pal12)+labs(title="WNN UMAP - ADT rough clustering")
DimPlot(seurObj, reduction = 'umap_adt_mnn', group.by = 'clusters_rough', cols=pal12)+labs(title="ADT UMAP - ADT rough clustering")
DimPlot(seurObj, reduction = 'umap_rna_mnn', group.by = 'clusters_rough', cols=pal12)+labs(title="RNA UMAP - ADT rough clustering")

In [None]:
options(repr.plot.width=16, repr.plot.height=10)

VlnPlot(seurObj, features=c('CD34', 'CD8', 'CD4', 'CD3', 'TCRab', 'TCRgd', 'CD1a', 'CD27', 'CD69'), pt.size = 0, group.by = 'clusters_rough', assay = 'ADTdsb', cols=pal12)

We can derive broad cluster annotations based on the expression of a few markers and the composition of the UMAP:

- 7+11: DNs
- 12: B/DC
- 5+8: DP(P)
- 1+3+6: DP(Q)
- 9: Treg
- 2: ab lineage
- 4: SP
- 10: CD8aa/NK/gdT

### DN annotation

We start the annotation process at the most immature T cell precursors (cluster7/11).

In [None]:
seurObj_DN <- subset(seurObj, subset=clusters_rough %in% c(7,11))

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_DN, reduction = 'umap_wnn_mnn', group.by = 'clusters_rough', cols=pal12)+labs(title="WNN UMAP - ADT rough clustering")

In [None]:
seurObj_DN <- seurObj_DN  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_DN')

In [None]:
MNN <- reducedMNN(seurObj_DN@reductions$pca_adt_DN@cell.embeddings,
                 batch=seurObj_DN$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())

seurObj_DN[["mnn_adt_DN"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                                 assay="ADTdsb",
                                                    key="mnn_")

In [None]:
seurObj_DN <- seurObj_DN  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_DN')

In [None]:
MNN <- reducedMNN(seurObj_DN@reductions$pca_rna_DN@cell.embeddings,
                 batch=seurObj_DN$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())

seurObj_DN[["mnn_rna_DN"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                                 assay="RNA",
                                                    key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_DN@reductions$pca_adt_DN, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_DN@reductions$pca_rna_DN, 0.1)
dim_rna

seurObj_DN <- FindMultiModalNeighbors(seurObj_DN,
                                  reduction.list=list('mnn_adt_DN', 'mnn_rna_DN'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_DN <- RunUMAP(seurObj_DN, nn.name = "weighted.nn", reduction.name = "umap_wnn_DN",
                        reduction.key = "wnnUMAP_")

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_DN, reduction = 'umap_wnn_DN', group.by = 'clusters_rough')

In [None]:
options(repr.plot.width=12, repr.plot.height=10)

DefaultAssay(seurObj_DN) <- 'ADTdsb'

FeaturePlot(seurObj_DN, features=c('CD34', 'CD8', 'CD4', 'CD44', 'TCRab', 'TCRgd', 'CD1a', 'rna_RAG2', 'rna_PTCRA'), order=T,reduction='umap_wnn_DN')&scale_color_viridis()

As seen previously, clustering on this cell subset is not useful for distinguishing biologically different populations since, the clusters don't really align with the cell types we know to be present during early T cell development. Instead we can use manual cut-offs for expression of marker combinations to identify pre-/post-committment cells.

In [None]:
options(repr.plot.width=7, repr.plot.height=6)

FeatureScatter(seurObj_DN, feature1 = 'CD34', feature2 = 'CD44', group.by = 'clusters_DN', cols=pal24)
FeatureScatter(seurObj_DN, feature1 = 'CD34', feature2 = 'CD1a', group.by = 'clusters_DN', cols=pal24)
FeatureScatter(seurObj_DN, feature1 = 'CD1a', feature2 = 'CD44', group.by = 'clusters_DN', cols=pal24)
FeatureScatter(seurObj_DN, feature1 = 'CD4', feature2 = 'CD8', group.by = 'clusters_DN', cols=pal24)
FeatureScatter(seurObj_DN, feature1 = 'CD34', feature2 = 'CD4', group.by = 'clusters_DN', cols=pal24)


High CD8 levels indicate DP stage, uncommitted cells have high CD44 and low CD1a as well as low CD4.

In [None]:
seurObj_DN$anno_new <- 'unknown_DN'
seurObj_DN$anno_new <- ifelse(colnames(seurObj_DN) %in% WhichCells(seurObj_DN, expression = CD8 > 7 & CD4 > 7 & CD34 < 50), "unknown_DP", seurObj_DN$anno_new)
seurObj_DN$anno_new <- ifelse(colnames(seurObj_DN) %in% WhichCells(seurObj_DN, expression = CD8 < 7 & CD4 < 7), "committed_CD4neg", seurObj_DN$anno_new)
seurObj_DN$anno_new <- ifelse(colnames(seurObj_DN) %in% WhichCells(seurObj_DN, expression = CD8 < 7 & CD4 > 7), "committed_CD4pos", seurObj_DN$anno_new)
seurObj_DN$anno_new <- ifelse(colnames(seurObj_DN) %in% WhichCells(seurObj_DN, expression = CD34 > 50 & CD44 > 10 & CD1a < 8 & CD4 < 7), "uncommitted", seurObj_DN$anno_new)

In [None]:
options(repr.plot.width=7, repr.plot.height=6)

FeatureScatter(seurObj_DN, feature1 = 'CD34', feature2 = 'CD44', group.by = 'anno_new', cols=pal12)
FeatureScatter(seurObj_DN, feature1 = 'CD34', feature2 = 'CD1a', group.by = 'anno_new', cols=pal12)
FeatureScatter(seurObj_DN, feature1 = 'CD1a', feature2 = 'CD44', group.by = 'anno_new', cols=pal12)
FeatureScatter(seurObj_DN, feature1 = 'CD4', feature2 = 'CD8', group.by = 'anno_new', cols=pal12)
FeatureScatter(seurObj_DN, feature1 = 'CD34', feature2 = 'CD4', group.by = 'anno_new', cols=pal12)


In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_DN, reduction = 'umap_wnn_DN', group.by = 'anno_new', cols=pal12)

Using this strategy we can label cells with clear expression combos of known markers. There are cells that do not fit with classic marker categorisation e.g. cells with high CD44 and CD1a (schould be mutually exclusive at this stage) and CD44hi CD34lo cells. We label these as unknown, since for these the Totalseq staining may not be reliable enough.

There also seems to be a non-negligible subset of DP thymocytes despite the subsetting for an early cluster.

Since there will be some proliferating cells at this stage, we should distinguish these from quiescent ones in the annotation.

In [None]:
table(seurObj_DN$anno_new, seurObj_DN$Phase)

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_DN, reduction = 'umap_wnn_DN', group.by = 'Phase')

In [None]:
seurObj_DN$anno_new <- ifelse(seurObj_DN$Phase %in% c('G2M', 'S') & seurObj_DN$anno_new == 'committed_CD4neg', "committed_CD4neg(P)", seurObj_DN$anno_new)
seurObj_DN$anno_new <- ifelse(seurObj_DN$Phase %in% c('G2M', 'S') & seurObj_DN$anno_new == 'committed_CD4pos', "committed_CD4pos(P)", seurObj_DN$anno_new)

In [None]:
table(seurObj_DN$anno_new, seurObj_DN$Phase)

In [None]:
options(repr.plot.width=8, repr.plot.height=6)

DimPlot(seurObj_DN, reduction = 'umap_wnn_DN', group.by = 'anno_new', cols=pal12)

In [None]:
options(repr.plot.width=16, repr.plot.height=14)

VlnPlot(seurObj_DN, features=c('CD34', 'CD8', 'CD4', 'CD44','TCRgd', 'CD1a', 'CD3', 'rna_PTCRA', 'rna_TRDC', 'rna_TRGC1', 'rna_RAG2', 'rna_DNTT'), cols=pal12, group.by = 'anno_new', pt.size = 0, ncol=4)

In [None]:
options(repr.plot.width=6, repr.plot.height=5)

FeatureScatter(seurObj_DN, feature1 = 'CD199', feature2 = 'TCRgd', group.by = 'clusters_DN')
FeatureScatter(seurObj_DN, feature1 = 'XCR1', feature2 = 'TCRgd', group.by = 'clusters_DN')
FeatureScatter(seurObj_DN, feature1 = 'XCR1', feature2 = 'CD199', group.by = 'clusters_DN')
FeatureScatter(seurObj_DN, feature1 = 'CD370', feature2 = 'CD199', group.by = 'clusters_DN')
FeatureScatter(seurObj_DN, feature1 = 'CD370', feature2 = 'TCRgd', group.by = 'clusters_DN')

Despite previous efforts to remove cells with antibody aggregates, a small subset of CD199+ TCRgd+ CD370+ cells can be seen. Since these represent a potentially misleading technical artefact, we label these cells accordingly.

In [None]:
cells_artefact <- WhichCells(seurObj_DN, expression = TCRgd > 20 | CD199 > 20 | CD370 > 20)

In [None]:
options(repr.plot.width=8, repr.plot.height=6)

DimPlot(seurObj_DN, reduction = 'umap_wnn_DN', cells.highlight = cells_artefact)

In [None]:
table(seurObj_DN$anno_new, seurObj_DN$barcode %in% cells_artefact)

In [None]:
seurObj_DN$anno_new <- ifelse(colnames(seurObj_DN) %in% cells_artefact, "unknown_techn", seurObj_DN$anno_new)

In [None]:
table(seurObj_DN$anno_new)

We add the new cell annotations to the full object.

In [None]:
seurObj$anno_new <- 'unknown'
seurObj$anno_new[match(colnames(seurObj_DN), colnames(seurObj))] <- seurObj_DN$anno_new

In [None]:
table(seurObj$anno_new)

In [None]:
options(repr.plot.width=8, repr.plot.height=6)

DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'anno_new', cols=pal12)

In [None]:
options(repr.plot.width=7, repr.plot.height=6)

DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'clusters_rough', cols=pal12)

### DP annotation

We carry over the 'DP_unknown' fraction labelled in the DN annotation step and combine it with the clusters containing DPs for annotation of these stages.

In [None]:
seurObj_DP <- subset(seurObj, subset=clusters_rough %in% c(1,3,5,6,8) | anno_new=='unknown_DP')

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj, reduction = 'umap_wnn_mnn', cells.highlight = colnames(seurObj)[colnames(seurObj) %in% colnames(seurObj_DP)], shuffle = T, sizes.highlight = 2)

In [None]:
table(seurObj_DP$clusters_rough)
table(seurObj_DP$anno_new)

In [None]:
seurObj_DP <- seurObj_DP  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_DP')

In [None]:
MNN <- reducedMNN(seurObj_DP@reductions$pca_adt_DP@cell.embeddings,
                 batch=seurObj_DP$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_DP[["mnn_adt_DP"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_DP <- seurObj_DP  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_DP')

In [None]:
MNN <- reducedMNN(seurObj_DP@reductions$pca_rna_DP@cell.embeddings,
                 batch=seurObj_DP$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_DP[["mnn_rna_DP"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_DP@reductions$pca_adt_DP, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_DP@reductions$pca_rna_DP, 0.1)
dim_rna

seurObj_DP <- FindMultiModalNeighbors(seurObj_DP,
                                  reduction.list=list('mnn_adt_DP', 'mnn_rna_DP'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_DP <- RunUMAP(seurObj_DP, nn.name = "weighted.nn", reduction.name = "umap_wnn_DP",
                        reduction.key = "wnnUMAP_")

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_DP, reduction = 'umap_wnn_DP', group.by = 'clusters_rough')

DimPlot(seurObj_DP, reduction = 'umap_wnn_DP', group.by = 'anno_new')

DimPlot(seurObj_DP, reduction = 'umap_wnn_DP', group.by = 'Phase')

In [None]:
options(repr.plot.width=16, repr.plot.height=16)

FeaturePlot(seurObj_DP, features=c('CD34', 'CD8', 'CD4', 'CD44', 'TCRab', 'TCRgd', 'CD1a', 'CD197', 'CD199', 'CD3', 'CD71', 'CD69', 'CD5', "rna_RAG2", "rna_DNTT", "rna_PTCRA"), order=T, ncol=4, reduction='umap_wnn_DP')&scale_color_viridis()

The cells previously grouped with the DNs still separate a lot from the remaining DPs and show higher levels of CD34 but low CD3/TCR. This indicates that they might represent the very earliest DPs.

In [None]:
seurObj_DP <- RunSPCA(seurObj_DP, assay='RNA', graph='wsnn')

In [None]:
ptm <- proc.time()

seurObj_DP <- FindNeighbors(seurObj_DP, graph.name = 'wsnn',reduction= 'spca', dims = 1:max(c(dim_adt, dim_rna)), assay='ADTdsb')
seurObj_DP <- FindClusters(seurObj_DP, graph.name = 'wsnn', algorithm = 4, resolution = 1, random.seed = 123, method = 'igraph', cluster.name = 'clusters_DP')

proc.time() - ptm

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_DP, reduction = 'umap_wnn_DP', group.by = 'clusters_DP', cols=pal24, label=TRUE)

In [None]:
options(repr.plot.width=16, repr.plot.height=14)

VlnPlot(seurObj_DP, features=c('CD34', 'CD8', 'CD4', 'CD44', 'TCRab', 'CD1a', 'CD3', 'CD71',  'CD47', 'CD2', 'CD69', "CD31", 'rna_PTCRA', 'rna_RAG2', 'GPR56', 'CD21'), ncol=4, group.by = 'clusters_DP', pt.size = 0.001, cols=pal24)

Cluster 16 is still CD34+ and CD8lo. It also expresses high levels of CD31, PTCRA and doesn't proliferate. Interestingly, CD31 was previously observed to be expressed in immature thymocytes up until the early DP stage and downregulated after beta-selection as cells transition from CD8a+b- to CD8a+b+ DP stage https://www.ncbi.nlm.nih.gov/pmc/articles/PMC5340597/

In [None]:
seurObj_DP$CD8ab_ratio <- (seurObj_DP@assays$RNA@data['CD8A',])/(seurObj_DP@assays$RNA@data['CD8B',])

In [None]:
CD8ab_ratio <- table(seurObj_DP$CD8ab_ratio >1, seurObj_DP$seurat_clusters) %>% t %>% as.data.frame.matrix %>% data.frame
CD8ab_ratio$prop_ratio_o1 <- (CD8ab_ratio$TRUE.)/rowSums(CD8ab_ratio)
CD8ab_ratio$cluster <- factor(rownames(CD8ab_ratio), levels=1:17)
CD8a_expr <- table(seurObj_DP$seurat_clusters, seurObj_DP@assays$RNA@data['CD8A',]>0) %>% as.data.frame.matrix %>% data.frame
CD8ab_ratio$prop_CD8a_expr <- (CD8a_expr$TRUE.)/rowSums(CD8a_expr)
CD8b_expr <- table(seurObj_DP$seurat_clusters, seurObj_DP@assays$RNA@data['CD8B',]>0) %>% as.data.frame.matrix %>% data.frame
CD8ab_ratio$prop_CD8b_expr <- (CD8b_expr$TRUE.)/rowSums(CD8b_expr)

CD8ab_expr <- pivot_longer(CD8ab_ratio[,4:6], cols=2:3, names_to = 'CD8', values_to = 'prop_expr')
CD8ab_expr$CD8 <- ifelse(CD8ab_expr$CD8 == 'prop_CD8a_expr', 'CD8a', 'CD8b')

In [None]:
options(repr.plot.width=7, repr.plot.height=5)

ggplot(CD8ab_ratio, aes(x=cluster, y=prop_ratio_o1, fill=cluster))+
geom_bar(stat = 'identity')+
theme_bw()+
geom_hline(yintercept = 0.5)+
scale_fill_manual(values=pal24)+
labs(y = 'Proportion of cells with CD8A:B transcript ratio > 1', title='Proportion of cells with higher CD8A than CD8B transcripts per cluster')+
theme(legend.position = 'none')

Looking at the CD8a to CD8b transcript ratio in the clusters indeed confirms that in the majority of cells in the CD31+ cluster 16 has a CD8a:b ratio of >1, whereas in the other clusters most cells express more CD8b than a.

In [None]:
options(repr.plot.width=7, repr.plot.height=5)

ggplot(CD8ab_expr, aes(x=cluster, y=prop_expr, colour=CD8))+
geom_point()+
theme_bw()+
geom_hline(yintercept = 0.5)+
ylim(c(0,1))+
labs(y = 'Percentage of cells within a cluster expressing CD8A/B', title='CD8A and CD8B expression per cluster', colour='CD8 expression')

Notably, while the proportion of CD8a expressing cells is also lower in cluster 16, the proportion is substantially lower for CD8b with only 20% of cluster 16 cell expressing CD8b, whereas in all other clusters it is detected in at least 80% of cells. This suggests that cluster 16 (or the majority of cells in this cluster) represents early CD8a+b- DPs, that are characterised by CD31 expression.

In [None]:
options(repr.plot.width=6, repr.plot.height=6)

FeatureScatter(seurObj_DP, feature1 = 'CD34', feature2 = 'CD8ab_ratio', plot.cor = FALSE, cols=pal24, group.by = 'clusters_DP')
FeatureScatter(seurObj_DP, feature1 = 'CD34', feature2 = 'CD4', plot.cor = FALSE, cols=pal24, group.by = 'clusters_DP')
FeatureScatter(seurObj_DP, feature1 = 'CD34', feature2 = 'CD31', plot.cor = FALSE, cols=pal24, group.by = 'clusters_DP')
FeatureScatter(seurObj_DP, feature1 = 'CD26', feature2 = 'CD31', plot.cor = FALSE, cols=pal24, group.by = 'clusters_DP')

We annotate the entire cluster 16 as DP_CD31 cells.

In [None]:
Idents(seurObj_DP) <- seurObj_DP$clusters_DP
seurObj_DP$anno_new <- ifelse(colnames(seurObj_DP) %in% WhichCells(seurObj_DP, idents = '16'), "DP_early_CD31", seurObj_DP$anno_new)

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_DP, reduction = 'umap_wnn_DP', group.by = 'anno_new')

The remaining DPs go through another round of integration and clustering.

In [None]:
seurObj_DP2 <- subset(seurObj_DP, subset=anno_new %in% c('unknown', 'unknown_DP'))
table(seurObj_DP2$anno_new)

In [None]:
seurObj_DP2 <- seurObj_DP2  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_DP2')

In [None]:
MNN <- reducedMNN(seurObj_DP2@reductions$pca_adt_DP2@cell.embeddings,
                 batch=seurObj_DP2$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_DP2[["mnn_adt_DP2"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_DP2 <- seurObj_DP2  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_DP2')

In [None]:
MNN <- reducedMNN(seurObj_DP2@reductions$pca_rna_DP2@cell.embeddings,
                 batch=seurObj_DP2$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_DP2[["mnn_rna_DP2"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_DP2@reductions$pca_adt_DP2, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_DP2@reductions$pca_rna_DP2, 0.1)
dim_rna

seurObj_DP2 <- FindMultiModalNeighbors(seurObj_DP2,
                                  reduction.list=list('mnn_adt_DP2', 'mnn_rna_DP2'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_DP2 <- RunUMAP(seurObj_DP2, nn.name = "weighted.nn", reduction.name = "umap_wnn_DP2",
                        reduction.key = "wnnUMAP_")

In [None]:
seurObj_DP2 <- RunSPCA(seurObj_DP2, assay='RNA', graph='wsnn')

In [None]:
ptm <- proc.time()

seurObj_DP2 <- FindNeighbors(seurObj_DP2, graph.name = 'wsnn',reduction= 'spca', dims = 1:max(c(dim_adt, dim_rna)), assay='ADTdsb')
seurObj_DP2 <- FindClusters(seurObj_DP2, graph.name = 'wsnn', algorithm = 4, resolution = 1, random.seed = 123, method = 'igraph', cluster.name='clusters_DP2')

proc.time() - ptm

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_DP2, reduction = 'umap_wnn_DP2', group.by = 'clusters_DP', cols=pal24, label=TRUE)

DimPlot(seurObj_DP2, reduction = 'umap_wnn_DP2', group.by = 'clusters_DP2', cols=pal24, label=TRUE)

DimPlot(seurObj_DP2, reduction = 'umap_wnn_DP2', group.by = 'Phase')

FeaturePlot(seurObj_DP2, features='CD71', order=T,reduction='umap_wnn_DP2')+scale_color_viridis()

In [None]:
table(seurObj_DP2$clusters_DP2, seurObj_DP2$Phase)

In [None]:
options(repr.plot.width=16, repr.plot.height=8)

VlnPlot(seurObj_DP2, features = c('CD3', 'TCRab','rna_RAG2', 'rna_DNTT', 'CD71', 'rna_CD8A', 'rna_CD8B'), group.by = 'clusters_DP2', pt.size = 0.001, ncol=4)

Cluster 2, 6, 8, 14, 15 are proliferating cells (CD71hi), while the rest is quiescent. The proliferating clusters also show lower levels of CD3, TCRab, CD8, RAG2 and DNTT. Cluster 11 seems to be an intermediate cluster where cells are not really proliferating anymore but rearrangement markers are only slowly increasing.

In [None]:
Idents(seurObj_DP2) <- seurObj_DP2$clusters_DP2
seurObj_DP2$anno_new <- ifelse(colnames(seurObj_DP2) %in% WhichCells(seurObj_DP2, idents = c("2", "6", "8", "14", "15")), "DP(P)", seurObj_DP2$anno_new)
seurObj_DP2$anno_new <- ifelse(colnames(seurObj_DP2) %in% WhichCells(seurObj_DP2, idents = c("11")), "DP(Q)", seurObj_DP2$anno_new)
seurObj_DP2$anno_new <- ifelse(colnames(seurObj_DP2) %in% WhichCells(seurObj_DP2, idents = c("1","3", "4","5","7","9","10","12","13")), "DP(Q)_rearr", seurObj_DP2$anno_new)

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_DP2, reduction = 'umap_wnn_DP2', group.by = 'anno_new', label=TRUE)

In [None]:
seurObj$anno_new[colnames(seurObj) %in% colnames(seurObj_DP)] <- seurObj_DP$anno_new
seurObj$anno_new[colnames(seurObj) %in% colnames(seurObj_DP2)] <- seurObj_DP2$anno_new

In [None]:
table(seurObj$anno_new)

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'anno_new', cols=pal12)

### SP annotation

The remaining cells represent (relatively) mature thymocytes and cells of several non-T lineages. The transition window from DP(Q)_rearr through positive selection to CD4 vs. CD8 lineage is difficult to annotate and may require several iterations of adapting cell annotations.

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'clusters_rough', cols=pal12)

The transition from cluster 6 to 2 represents the positive selection step and cluster 2 cells mark the lineage bifurcation. CD4 and CD8 lineage cells cluster together (4). Cluster 9 represents Tregs but contains a small separate subset that appears to be CD8+.

To annotate positive selection and bifurcating properly, we include the late DP(Q) cells in the SP analysis for finetuning and as expression level reference.

In [None]:
seurObj_SP <- subset(seurObj, subset=clusters_rough %in% c('6', '2', '4', '9') & anno_new %in% c('unknown', 'DP(Q)_rearr'))

In [None]:
table(seurObj_SP$clusters_rough, seurObj_SP$anno_new)

In [None]:
options(repr.plot.width=16, repr.plot.height=12)

FeatureScatter(seurObj_SP, feature1 = 'CD8', feature2 = 'CD4', group.by = 'clusters_rough', cols=pal24)+
    facet_wrap(~seurObj_SP$clusters_rough)+
    theme_bw()

In [None]:
seurObj_SP <- seurObj_SP  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_SP')

In [None]:
MNN <- reducedMNN(seurObj_SP@reductions$pca_adt_SP@cell.embeddings,
                 batch=seurObj_SP$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_SP[["mnn_adt_SP"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_SP <- seurObj_SP  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_SP')

In [None]:
MNN <- reducedMNN(seurObj_SP@reductions$pca_rna_SP@cell.embeddings,
                 batch=seurObj_SP$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_SP[["mnn_rna_SP"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_SP@reductions$pca_adt_SP, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_SP@reductions$pca_rna_SP, 0.1)
dim_rna

seurObj_SP <- FindMultiModalNeighbors(seurObj_SP,
                                  reduction.list=list('mnn_adt_SP', 'mnn_rna_SP'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_SP <- RunUMAP(seurObj_SP, nn.name = "weighted.nn", reduction.name = "umap_wnn_SP",
                        reduction.key = "wnnUMAP_")

In [None]:
seurObj_SP <- RunSPCA(seurObj_SP, assay='RNA', graph='wsnn')

In [None]:
ptm <- proc.time()

seurObj_SP <- FindNeighbors(seurObj_SP, graph.name = 'wsnn',reduction= 'spca', dims = 1:max(c(dim_adt, dim_rna)), assay='ADTdsb')
seurObj_SP <- FindClusters(seurObj_SP, graph.name = 'wsnn', algorithm = 4, resolution = 1, random.seed = 123, method = 'igraph', cluster.name='clusters_SP')

proc.time() - ptm

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_SP, reduction = 'umap_wnn_SP', group.by = 'clusters_rough', cols=pal12)

DimPlot(seurObj_SP, reduction = 'umap_wnn_SP', group.by = 'clusters_SP', cols=pal24, label=T)

options(repr.plot.width=12, repr.plot.height=7)

DimPlot(seurObj_SP, reduction = 'umap_wnn_SP', group.by = 'anno_CITE_old', cols=pal36)

DimPlot(seurObj_SP, reduction = 'umap_wnn_SP', group.by = 'annotation_level_4', cols=pal36)

In [None]:
options(repr.plot.width=20, repr.plot.height=17)

RidgePlot(seurObj_SP, cols=pal24, group.by = 'clusters_SP', features =  c('CD3', 'TCRab', 'CD4', 'CD8','CD69', 'CD5', 'CD45RA', 'CD45RO', 'CD127', 'CD29', 'CD155', 'CD226', 'CD44', 'CD47', 'CD25'), ncol=5)

options(repr.plot.width=16, repr.plot.height=8)

VlnPlot(seurObj_SP, cols=pal24, pt.size=0, features =  c('rna_RAG2', 'rna_DNTT', 'rna_NR4A1', 'rna_ID3','rna_FOXP3', 'rna_CD8A', 'rna_CD8B'), ncol=4, group.by = 'clusters_SP')

The annotation of these clusters is relatively difficult. We remove Tregs (cluster 9) and the few outlier cells to obtain more clarity about the lineage bifurcation.

In [None]:
seurObj_Treg <- subset(seurObj_SP, subset=clusters_rough == '9' | clusters_SP %in% c('9','11'))

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

cells <- Embeddings(seurObj_SP, reduction = "umap_wnn_SP") %>% data.frame %>% subset(., umapwnnsp_1 < (-6.3) & umapwnnsp_2 < 0) %>% rownames

DimPlot(seurObj_SP, reduction = 'umap_wnn_SP', cells.highlight = cells)

In [None]:
seurObj_SP2 <- subset(seurObj_SP, subset=barcode %in% cells | clusters_rough == '9' | clusters_SP %in% c('9','11'), invert=T)

In [None]:
seurObj_SP2 <- seurObj_SP2  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_SP2')

In [None]:
MNN <- reducedMNN(seurObj_SP2@reductions$pca_adt_SP2@cell.embeddings,
                 batch=seurObj_SP2$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_SP2[["mnn_adt_SP2"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_SP2 <- seurObj_SP2  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_SP2')

In [None]:
MNN <- reducedMNN(seurObj_SP2@reductions$pca_rna_SP2@cell.embeddings,
                 batch=seurObj_SP2$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_SP2[["mnn_rna_SP2"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_SP2@reductions$pca_adt_SP2, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_SP2@reductions$pca_rna_SP2, 0.1)
dim_rna

seurObj_SP2 <- FindMultiModalNeighbors(seurObj_SP2,
                                  reduction.list=list('mnn_adt_SP2', 'mnn_rna_SP2'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_SP2 <- RunUMAP(seurObj_SP2, nn.name = "weighted.nn", reduction.name = "umap_wnn_SP2",
                        reduction.key = "wnnUMAP_")

In [None]:
seurObj_SP2 <- RunSPCA(seurObj_SP2, assay='RNA', graph='wsnn')

In [None]:
ptm <- proc.time()

seurObj_SP2 <- FindNeighbors(seurObj_SP2, graph.name = 'wsnn',reduction= 'spca', dims = 1:max(c(dim_adt, dim_rna)), assay='ADTdsb')
seurObj_SP2 <- FindClusters(seurObj_SP2, graph.name = 'wsnn', algorithm = 4, resolution = 1.2, random.seed = 123, method = 'igraph', cluster.name='clusters_SP2')

proc.time() - ptm

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_SP2, reduction = 'umap_wnn_SP2', group.by = 'clusters_rough', cols=pal12)

DimPlot(seurObj_SP2, reduction = 'umap_wnn_SP2', group.by = 'clusters_SP', cols=pal24, label=T)

DimPlot(seurObj_SP2, reduction = 'umap_wnn_SP2', group.by = 'clusters_SP2', cols=pal24, label=T)

options(repr.plot.width=12, repr.plot.height=7)

DimPlot(seurObj_SP2, reduction = 'umap_wnn_SP2', group.by = 'anno_CITE_old', cols=pal36)

DimPlot(seurObj_SP2, reduction = 'umap_wnn_SP2', group.by = 'annotation_level_4', cols=pal36)

The bifurcation is now more clear but clustering does not really result in discreate subsets that are associated with bi- or unipotent stages.

In [None]:
options(repr.plot.width=20, repr.plot.height=17)

RidgePlot(seurObj_SP2, cols=pal24, group.by = 'clusters_SP2', features =  c('CD3', 'TCRab', 'CD4', 'CD8','CD69', 'CD5', 'CD45RA', 'CD45RO', 'CD127', 'CD29', 'CD155', 'CD226', 'CD44', 'CD27', 'CD1a'), ncol=5)

options(repr.plot.width=16, repr.plot.height=8)

VlnPlot(seurObj_SP2, cols=pal24, pt.size=0, features =  c('rna_RAG2', 'rna_DNTT', 'rna_NR4A1', 'rna_ID3','rna_FOXP3', 'rna_CD8A', 'rna_CD8B'), ncol=4, group.by = 'clusters_SP2')

In [None]:
options(repr.plot.width=16, repr.plot.height=10)

DimPlot(seurObj_SP2, reduction = 'umap_wnn_SP2', group.by = 'clusters_SP2', cols=pal24, split.by = 'clusters_SP2', ncol=5)+theme_bw()

In [None]:
options(repr.plot.width=16, repr.plot.height=16)

FeatureScatter(seurObj_SP2, feature1 = 'CD8', feature2 = 'CD4', group.by = 'anno_CITE_old', cols=pal36)+
    facet_wrap(~seurObj_SP2$clusters_SP2, ncol = 4)+
    theme_bw()

In [None]:
options(repr.plot.width=16, repr.plot.height=16)

FeatureScatter(seurObj_SP2, feature1 = 'CD44', feature2 = 'CD226', group.by = 'anno_CITE_old', cols=pal36)+
    facet_wrap(~seurObj_SP2$clusters_SP2, ncol = 4)+
    theme_bw()

In [None]:
options(repr.plot.width=16, repr.plot.height=16)

FeatureScatter(seurObj_SP2, feature1 = 'CD69', feature2 = 'CD45RA', group.by = 'anno_CITE_old', cols=pal36)+
    facet_wrap(~seurObj_SP2$clusters_SP2, ncol = 4)+
    theme_bw()

In [None]:
table(seurObj_SP2$anno_CITE_old, seurObj_SP2$clusters_SP2)

Broad assignment of populations:
- 4,7,11,14: DP(Q)_rearr
- 1: pos sel
- 5: pos sel/hilo
- 6+8+12: hilo (8 also contains some CD4/8 lineage cells)
- 10+16: also hilo (16 is very heterogeneous)
- 3,13,17: CD8SP (3+13 partially mixed with hilo and CD4)
- 2+9+15: CD4SP

Filtering on CD4/8 levels necessary for most clusters

6,8,10,12 should be subclustered

In [None]:
Idents(seurObj_SP2) <- seurObj_SP2$clusters_SP2
seurObj_SP2$anno_new2 <- 'unknown'
seurObj_SP2$anno_new2 <- ifelse(colnames(seurObj_SP2) %in% WhichCells(seurObj_SP2, expression = CD4 > 5 & CD8 > 8, idents = c('4','7','11', '14')), "DP(Q)_rearr", seurObj_SP2$anno_new2)
seurObj_SP2$anno_new2 <- ifelse(colnames(seurObj_SP2) %in% WhichCells(seurObj_SP2, expression = CD4 > 5, idents = c('1')), "DP_pos_sel", seurObj_SP2$anno_new2)
seurObj_SP2$anno_new2 <- ifelse(colnames(seurObj_SP2) %in% WhichCells(seurObj_SP2, expression = CD4 > 5 & CD8 < 15, idents = c('5', '6','8','10', '12', '16')), "DP_4hi8lo", seurObj_SP2$anno_new2)
seurObj_SP2$anno_new2 <- ifelse(colnames(seurObj_SP2) %in% WhichCells(seurObj_SP2, expression = CD4 > 5 & CD8 > 5, idents = c('6','10', '12', '16')), "DP_4hi8lo", seurObj_SP2$anno_new2)
seurObj_SP2$anno_new2 <- ifelse(colnames(seurObj_SP2) %in% WhichCells(seurObj_SP2, expression = CD4 < 5 & CD8 > 5, idents = c('8')), "SP_CD8", seurObj_SP2$anno_new2)
seurObj_SP2$anno_new2 <- ifelse(colnames(seurObj_SP2) %in% WhichCells(seurObj_SP2, expression = CD8 > 8, idents = c('13','3','17')), "SP_CD8", seurObj_SP2$anno_new2)
seurObj_SP2$anno_new2 <- ifelse(colnames(seurObj_SP2) %in% WhichCells(seurObj_SP2, expression = CD8 < 8 & CD4 > 5, idents = c('15','2','9')), "SP_CD4", seurObj_SP2$anno_new2)

In [None]:
options(repr.plot.width=16, repr.plot.height=16)

FeatureScatter(seurObj_SP2, feature1 = 'CD8', feature2 = 'CD4', group.by = 'anno_new2', cols=pal12)+
    facet_wrap(~seurObj_SP2$clusters_SP2, ncol = 4)+
    theme_bw()

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_SP2, reduction = 'umap_wnn_SP2', group.by = 'anno_new2', cols=pal12)

In [None]:
options(repr.plot.width=16, repr.plot.height=10)

DimPlot(seurObj_SP2, reduction = 'umap_wnn_SP2', group.by = 'clusters_SP2', cols=pal24, split.by = 'anno_new2', ncol=3)+theme_bw()

Comparison with previous annotations:

In [None]:
options(repr.plot.width=12, repr.plot.height=8)

df <- count(seurObj_SP2@meta.data, barcode, anno_CITE_old, anno_new2) %>%
        mutate(id = row_number())%>%
        pivot_longer(cols=2:3, names_to = 'version', values_to = 'celltype')

ggplot(df, aes(x = version, y = n, 
                          stratum = celltype, fill = celltype,
                          alluvium = id)) +
  geom_stratum(alpha = .5) +
  geom_flow()+
  theme_minimal()+
  scale_fill_manual(values = c(pal36, 'grey30'))

#### The CD4hi8lo subset

The transition window from positive selection to a clear lineage phenotype through the CD4hiCD8lo stage is very vague. We can try to refine it by zooming in on this stage.

In [None]:
seurObj_hilo <- subset(seurObj_SP2, subset=(anno_new2=='DP_4hi8lo'))

In [None]:
seurObj_hilo <- seurObj_hilo  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_hilo')

In [None]:
MNN <- reducedMNN(seurObj_hilo@reductions$pca_adt_hilo@cell.embeddings,
                 batch=seurObj_hilo$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_hilo[["mnn_adt_hilo"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_hilo <- seurObj_hilo  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_hilo')

In [None]:
MNN <- reducedMNN(seurObj_hilo@reductions$pca_rna_hilo@cell.embeddings,
                 batch=seurObj_hilo$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_hilo[["mnn_rna_hilo"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_hilo@reductions$pca_adt_hilo, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_hilo@reductions$pca_rna_hilo, 0.1)
dim_rna

seurObj_hilo <- FindMultiModalNeighbors(seurObj_hilo,
                                  reduction.list=list('mnn_adt_hilo', 'mnn_rna_hilo'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_hilo <- RunUMAP(seurObj_hilo, nn.name = "weighted.nn", reduction.name = "umap_wnn_hilo",
                        reduction.key = "wnnUMAP_")

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_hilo, reduction = 'umap_wnn_hilo', group.by = 'clusters_SP2', cols=pal12)

In [None]:
seurObj_hilo <- RunUMAP(seurObj_hilo, assay = 'ADTdsb', dims = 1:dim_adt, reduction = 'mnn_adt_hilo', reduction.name = "umap_adt_hilo",
                        reduction.key = "wnnUMAP_")

In [None]:
ptm <- proc.time()

seurObj_hilo <- FindNeighbors(seurObj_hilo, reduction= 'mnn_adt_hilo', dims=1:dim_adt, assay='ADTdsb')
seurObj_hilo <- FindClusters(seurObj_hilo, algorithm = 4, resolution = 1, random.seed = 123, method = 'igraph', cluster.name='clusters_adt_hilo')

proc.time() - ptm

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_hilo, reduction = 'umap_wnn_hilo', group.by = 'clusters_adt_hilo', cols=pal24)
DimPlot(seurObj_hilo, reduction = 'umap_adt_hilo', group.by = 'clusters_adt_hilo', cols=pal24)

In [None]:
options(repr.plot.width=16, repr.plot.height=16)

FeatureScatter(seurObj_hilo, feature1 = 'CD8', feature2 = 'CD4', group.by = 'clusters_SP2', cols=pal12)+
    facet_wrap(~seurObj_hilo$clusters_adt_hilo, ncol = 4)+
    theme_bw()

In [None]:
options(repr.plot.width=16, repr.plot.height=10)

FeatureScatter(seurObj_hilo, feature1 = 'CD8', feature2 = 'CD4', group.by = 'anno_CITE_old', cols=pal24)+
    facet_wrap(~seurObj_hilo$clusters_adt_hilo, ncol = 4)+
    theme_bw()

In [None]:
table(seurObj_hilo$anno_CITE_old, seurObj_hilo$clusters_adt_hilo)

In [None]:

options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_SP2, reduction = 'umap_wnn_SP2', cells.highlight = WhichCells(seurObj_hilo, idents = c('7','10'), expression= CD8 <8))

This analysis clearly indicates that a subset of cells labeled 4hi8lo actually seems to belong to the CD4 lineage. We adjust the annotation accordingly.

In [None]:
seurObj_SP2$anno_new2 <- ifelse(colnames(seurObj_SP2) %in% WhichCells(seurObj_hilo, idents = c('7','10'), expression= CD8 <8), "SP_CD4", seurObj_SP2$anno_new2)

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_SP2, reduction = 'umap_wnn_SP2', group.by = 'anno_new2', cols=pal12)

#### CD4SP

In [None]:
seurObj_SPCD4 <- subset(seurObj_SP2, subset=(anno_new2=='SP_CD4'))

In [None]:
seurObj_SPCD4 <- seurObj_SPCD4  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_SPCD4')

In [None]:
MNN <- reducedMNN(seurObj_SPCD4@reductions$pca_adt_SPCD4@cell.embeddings,
                 batch=seurObj_SPCD4$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_SPCD4[["mnn_adt_SPCD4"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_SPCD4 <- seurObj_SPCD4  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_SPCD4')

In [None]:
MNN <- reducedMNN(seurObj_SPCD4@reductions$pca_rna_SPCD4@cell.embeddings,
                 batch=seurObj_SPCD4$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_SPCD4[["mnn_rna_SPCD4"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_SPCD4@reductions$pca_adt_SPCD4, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_SPCD4@reductions$pca_rna_SPCD4, 0.1)
dim_rna

seurObj_SPCD4 <- FindMultiModalNeighbors(seurObj_SPCD4,
                                  reduction.list=list('mnn_adt_SPCD4', 'mnn_rna_SPCD4'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_SPCD4 <- RunUMAP(seurObj_SPCD4, nn.name = "weighted.nn", reduction.name = "umap_wnn_SPCD4",
                        reduction.key = "wnnUMAP_")

In [None]:
seurObj_SPCD4 <- RunUMAP(seurObj_SPCD4, assay = 'ADTdsb', dims = 1:dim_adt, reduction = 'mnn_adt_SPCD4', reduction.name = "umap_adt_SPCD4",
                        reduction.key = "wnnUMAP_")

In [None]:
ptm <- proc.time()

seurObj_SPCD4 <- FindNeighbors(seurObj_SPCD4, reduction= 'mnn_adt_SPCD4', dims=1:dim_adt, assay='ADTdsb')
seurObj_SPCD4 <- FindClusters(seurObj_SPCD4, algorithm = 4, resolution = 1, random.seed = 123, method = 'igraph', cluster.name='clusters_SPCD4_adt')

proc.time() - ptm

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_SPCD4, reduction = 'umap_adt_SPCD4', group.by = 'anno_CITE_old', cols=pal24)

DimPlot(seurObj_SPCD4, reduction = 'umap_adt_SPCD4', group.by = 'clusters_SPCD4_adt', cols=pal12)


In [None]:
table(seurObj_SPCD4$anno_CITE_old, seurObj_SPCD4$clusters_SPCD4_adt)

#### CD8SP

In [None]:
seurObj_SPCD8 <- subset(seurObj_SP2, subset=(anno_new2=='SP_CD8'))

In [None]:
seurObj_SPCD8 <- seurObj_SPCD8  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_SPCD8')

In [None]:
table(seurObj_SP$clusters_SP, seurObj_SP$clusters_rough)

In [None]:
MNN <- reducedMNN(seurObj_SPCD8@reductions$pca_adt_SPCD8@cell.embeddings,
                 batch=seurObj_SPCD8$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_SPCD8[["mnn_adt_SPCD8"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_SPCD8 <- seurObj_SPCD8  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_SPCD8')

In [None]:
table(seurObj_DN$anno_new, seurObj_DN$anno_CITE_old)

In [None]:
MNN <- reducedMNN(seurObj_SPCD8@reductions$pca_rna_SPCD8@cell.embeddings,
                 batch=seurObj_SPCD8$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_SPCD8[["mnn_rna_SPCD8"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
table(seurObj_DN$anno_new)
      
table(seurObj_DN$anno_CITE_old)

In [None]:
options(repr.plot.width=9, repr.plot.height=7)


DimPlot(seurObj_SP2, reduction = 'umap_wnn_SP2', group.by = 'anno_new2', cols=pal12)


In [None]:
dim_adt <- PCcutoff(seurObj_SPCD8@reductions$pca_adt_SPCD8, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_SPCD8@reductions$pca_rna_SPCD8, 0.1)
dim_rna

seurObj_SPCD8 <- FindMultiModalNeighbors(seurObj_SPCD8,
                                  reduction.list=list('mnn_adt_SPCD8', 'mnn_rna_SPCD8'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_SPCD8 <- RunUMAP(seurObj_SPCD8, nn.name = "weighted.nn", reduction.name = "umap_wnn_SPCD8",
                        reduction.key = "wnnUMAP_")

In [None]:
seurObj_SPCD8 <- RunUMAP(seurObj_SPCD8, assay = 'ADTdsb', dims = 1:dim_adt, reduction = 'mnn_adt_SPCD8', reduction.name = "umap_adt_SPCD8",
                        reduction.key = "wnnUMAP_")

In [None]:
ptm <- proc.time()

seurObj_SPCD8 <- FindNeighbors(seurObj_SPCD8, reduction= 'mnn_adt_SPCD8', dims=1:dim_adt, assay='ADTdsb')
seurObj_SPCD8 <- FindClusters(seurObj_SPCD8, algorithm = 4, resolution = 1, random.seed = 123, method = 'igraph', cluster.name='clusters_SPCD8_adt')

proc.time() - ptm

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_SPCD8, reduction = 'umap_adt_SPCD8', group.by = 'anno_CITE_old', cols=pal24)

DimPlot(seurObj_SPCD8, reduction = 'umap_adt_SPCD8', group.by = 'clusters_SPCD8_adt', cols=pal12)


#### SP substages

We have subset and reintegrated the two T lineages separately to permit a better annotation of subsequent maturation stages.

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_SPCD4, reduction = 'umap_adt_SPCD4', group.by = 'clusters_SPCD4_adt', cols=pal12)

DimPlot(seurObj_SPCD8, reduction = 'umap_adt_SPCD8', group.by = 'clusters_SPCD8_adt', cols=pal12)


In [None]:
options(repr.plot.width=16, repr.plot.height=8)

VlnPlot(seurObj_SPCD4, group.by = 'clusters_SPCD4_adt', features = c('CD1a', 'CD45RA', 'CD4', 'CD8', 'CD69', 'CD226', 'CD44', 'CD27'),ncol=4, pt.size = 0, cols=pal12)
VlnPlot(seurObj_SPCD8, group.by = 'clusters_SPCD8_adt', features = c('CD1a', 'CD45RA', 'CD4', 'CD8', 'CD69', 'CD226', 'CD44', 'CD27'), ncol=4, pt.size = 0, cols=pal12)


In [None]:
options(repr.plot.width=16, repr.plot.height=12)

FeatureScatter(seurObj_SPCD4, feature1 = 'CD45RA', feature2 = 'CD1a', cols=pal12)+facet_wrap(~seurObj_SPCD4$clusters_SPCD4_adt, ncol=4)+theme_bw()

FeatureScatter(seurObj_SPCD8, feature1 = 'CD45RA', feature2 = 'CD1a', cols=pal12)+facet_wrap(~seurObj_SPCD8$clusters_SPCD8_adt, ncol=4)+theme_bw()

Cluster analysis and comparison between the two lineages indicates that the same expression cutoffs for CD1a and CD45RA and informed cluster selection should allow the annotation of matching maturation stages. The mature cells for both lineages clearly form a separate cluster and are thus annotated based on the cluster only without taking into account marker expression.

In [None]:
Idents(seurObj_SPCD4) <- seurObj_SPCD4$clusters_SPCD4_adt
seurObj_SPCD4$anno_new3 <- 'unknown'
seurObj_SPCD4$anno_new3 <- ifelse(colnames(seurObj_SPCD4) %in% WhichCells(seurObj_SPCD4, expression = CD45RA < 20, idents = c('1', '4', '8', '5')), "SP_CD4_immature", seurObj_SPCD4$anno_new3)
seurObj_SPCD4$anno_new3 <- ifelse(colnames(seurObj_SPCD4) %in% WhichCells(seurObj_SPCD4, expression = CD1a < 6, idents = c('2', '3', '6', '7')), "SP_CD4_semimature", seurObj_SPCD4$anno_new3)
seurObj_SPCD4$anno_new3 <- ifelse(colnames(seurObj_SPCD4) %in% WhichCells(seurObj_SPCD4, idents = '9'), "SP_CD4_mature", seurObj_SPCD4$anno_new3)


In [None]:
options(repr.plot.width=8, repr.plot.height=7)


DimPlot(seurObj_SPCD4, reduction = 'umap_adt_SPCD4', group.by = 'anno_new3', cols=pal12)


In [None]:
Idents(seurObj_SPCD8) <- seurObj_SPCD8$clusters_SPCD8_adt
seurObj_SPCD8$anno_new3 <- 'unknown'
seurObj_SPCD8$anno_new3 <- ifelse(colnames(seurObj_SPCD8) %in% WhichCells(seurObj_SPCD8, expression = CD45RA < 20 & CD4 < 5, idents = c('4', '3', '7')), "SP_CD8_immature", seurObj_SPCD8$anno_new3)
seurObj_SPCD8$anno_new3 <- ifelse(colnames(seurObj_SPCD8) %in% WhichCells(seurObj_SPCD8, expression = CD1a < 6, idents = c('1', '2', '5','6', '8')), "SP_CD8_semimature", seurObj_SPCD8$anno_new3)
seurObj_SPCD8$anno_new3 <- ifelse(colnames(seurObj_SPCD8) %in% WhichCells(seurObj_SPCD8, idents = '9'), "SP_CD8_mature", seurObj_SPCD8$anno_new3)


In [None]:
options(repr.plot.width=8, repr.plot.height=7)


DimPlot(seurObj_SPCD8, reduction = 'umap_adt_SPCD8', group.by = 'anno_new3', cols=pal12)


We can transfer these annotations back to the SP object.

In [None]:
seurObj_SP2$anno_new2[colnames(seurObj_SP2) %in% colnames(seurObj_SPCD8)] <- seurObj_SPCD8$anno_new3
seurObj_SP2$anno_new2[colnames(seurObj_SP2) %in% colnames(seurObj_SPCD4)] <- seurObj_SPCD4$anno_new3

In [None]:
seurObj_SP$anno_new[colnames(seurObj_SP) %in% colnames(seurObj_SP2)] <- seurObj_SP2$anno_new2
seurObj_SP$anno_new[colnames(seurObj_SP) %in% colnames(seurObj_SP2)] <- seurObj_SP2$anno_new2

In [None]:
options(repr.plot.width=9, repr.plot.height=7)


DimPlot(seurObj_SP2, reduction = 'umap_wnn_SP2', group.by = 'anno_new2', cols=pal12)

DimPlot(seurObj_SP, reduction = 'umap_wnn_SP', group.by = 'anno_new', cols=pal12)


In [None]:
options(repr.plot.width=16, repr.plot.height=4)

VlnPlot(seurObj_SP2, group.by = 'anno_new2', features = c('CD1a', 'CD45RA', 'CD4', 'CD8'),ncol=4, pt.size = 0.001, cols=pal12)

Some CD8-annotated cells express medium levels of CD4. We label these cells as unknown to avoid crosscontamination between lineages.

In [None]:
Idents(seurObj_SP2) <- seurObj_SP2$anno_new2

seurObj_SP2$anno_new2 <- ifelse(colnames(seurObj_SP2) %in% WhichCells(seurObj_SP2, expression = CD4 > 5, idents = c("SP_CD8_immature", "SP_CD8_semimature", "SP_CD8_mature")), "unknown", seurObj_SP2$anno_new2)

In [None]:
options(repr.plot.width=16, repr.plot.height=4)

VlnPlot(seurObj_SP2, group.by = 'anno_new2', features = c('CD1a', 'CD45RA', 'CD4', 'CD8'),ncol=4, pt.size = 0.001, cols=pal12)

In [None]:
options(repr.plot.width=20, repr.plot.height=8)


DimPlot(seurObj_SP2, reduction = 'umap_wnn_SP2', group.by = 'anno_new2', cols=pal12, split.by = 'anno_new2', ncol=5)


Comparison between new and old annotations:

In [None]:
options(repr.plot.width=12, repr.plot.height=8)

df <- count(seurObj_SP2@meta.data, barcode, anno_CITE_old, anno_new2) %>%
        mutate(id = row_number())%>%
        pivot_longer(cols=2:3, names_to = 'version', values_to = 'celltype')


ggplot(df, aes(x = version, y = n, 
                          stratum = celltype, fill = celltype,
                          alluvium = id)) +
  geom_stratum(alpha = .5) +
  geom_flow()+
  theme_minimal()+
  scale_fill_manual(values = c(pal36, 'grey30'))

In [None]:
table(seurObj_SP2$anno_new2)

#### Pseudotime-based annotation refinement

By determining lineage pseudotimes, we can assess the sturdiness of the annotations and - if needed - remove cells associated with the wrong lineage.

In [None]:
seurObj_SP3 <- subset(seurObj_SP2, subset=anno_new2 %in% c('DP(Q)_rearr', 'unknown'), invert=T)

In [None]:
seurObj_SP3 <- seurObj_SP3  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_SP3')

In [None]:
MNN <- reducedMNN(seurObj_SP3@reductions$pca_adt_SP3@cell.embeddings,
                 batch=seurObj_SP3$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_SP3[["mnn_adt_SP3"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_SP3 <- seurObj_SP3  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_SP3')

In [None]:
MNN <- reducedMNN(seurObj_SP3@reductions$pca_rna_SP3@cell.embeddings,
                 batch=seurObj_SP3$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_SP3[["mnn_rna_SP3"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_SP3@reductions$pca_adt_SP3, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_SP3@reductions$pca_rna_SP3, 0.1)
dim_rna

seurObj_SP3 <- FindMultiModalNeighbors(seurObj_SP3,
                                  reduction.list=list('mnn_adt_SP3', 'mnn_rna_SP3'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_SP3 <- RunUMAP(seurObj_SP3, nn.name = "weighted.nn", reduction.name = "umap_wnn_SP3",
                        reduction.key = "wnnUMAP_")

In [None]:
seurObj_SP3 <- RunSPCA(seurObj_SP3, assay='RNA', graph='wsnn')

In [None]:
library(slingshot)
library(SingleCellExperiment)

In [None]:
SCE_SP <- seurObj_SP3 %>% as.SingleCellExperiment

In [None]:
reducedDim(SCE_SP, type='UMAP_WNN_SP') <- seurObj_SP3@reductions$umap_wnn_SP3@cell.embeddings

In [None]:
lineages <- getLineages(reducedDims(SCE_SP)$UMAP_WNN_SP, dist.method='mnn', clusterLabels=seurObj_SP3$anno_new2, start.clus='DP_pos_sel', end.clus=c('SP_CD4_mature', 'SP_CD8_mature'))

In [None]:
ptm <- proc.time()

curves <-getCurves(lineages, extend="n", stretch=0)

proc.time() - ptm

In [None]:
clusters_SP <- 1:length(table(as.character(seurObj_SP3$anno_new2)))
names(clusters_SP) <- names(table(as.character(seurObj_SP3$anno_new2)))

In [None]:
options(repr.plot.width=8, repr.plot.height=8)

plot(reducedDims(SCE_SP)$UMAP_WNN_SP, col = pal12[as.numeric(clusters_SP[as.character(seurObj_SP3$anno_new2)])],
    pch = 16, cex=0.5, main="Start + Ends - mnn")
lines(SlingshotDataSet(lineages), col = 'black', show.constraints = TRUE)
lines(SlingshotDataSet(curves), col = 'blue', show.constraints = TRUE)

In [None]:
slingLineages(curves)

In [None]:
pseudotime_SP.df <- data.frame(slingPseudotime(curves))
pseudotime_SP.df$anno <- seurObj_SP3$anno_new2[rownames(pseudotime_SP.df)]

pseudotime_SP.df %>% head

In [None]:
options(repr.plot.width=10, repr.plot.height=6)

ggplot(pseudotime_SP.df, aes(x = Lineage1, y = anno, colour = anno)) +
    geom_jitter(size=1) +
    theme_classic() +
    xlab("Pseudotime") + ylab("Celltype") +
    scale_color_manual(values = pal12)+
    ggtitle(paste("CD4 lineage cells ordered by pseudotime"))&NoLegend()

ggplot(pseudotime_SP.df, aes(x = Lineage2, y = anno, colour = anno)) +
    geom_jitter(size=1) +
    theme_classic() +
    xlab("Pseudotime") + ylab("Celltype") +
    scale_color_manual(values = pal12)+
    ggtitle(paste("CD8 lineage cells ordered by pseudotime"))&NoLegend()    

Pseudotimes constructed on the newly generated UMAP do not make much sense due to the kincks in the curves. Instead we calculate pseudotimes based on the previous UMAP after removal of DP(Q) and unknown cells.

In [None]:
lineages2 <- getLineages(reducedDims(SCE_SP)$UMAP_WNN_SP2, dist.method='mnn', clusterLabels=seurObj_SP3$anno_new2, start.clus='DP_pos_sel', end.clus=c('SP_CD4_mature', 'SP_CD8_mature'))

In [None]:
ptm <- proc.time()

curves2 <-getCurves(lineages2, extend="n", stretch=0)

proc.time() - ptm

In [None]:
options(repr.plot.width=8, repr.plot.height=8)

plot(reducedDims(SCE_SP)$UMAP_WNN_SP2, col = pal12[as.numeric(clusters_SP[as.character(seurObj_SP3$anno_new2)])],
    pch = 16, cex=0.5, main="Start + Ends - mnn")
lines(SlingshotDataSet(lineages2), col = 'black', show.constraints = TRUE)
lines(SlingshotDataSet(curves2), col = 'blue', show.constraints = TRUE)

In [None]:
slingLineages(curves)

In [None]:
pseudotime_SP.df2 <- data.frame(slingPseudotime(curves2))
pseudotime_SP.df2$anno <- seurObj_SP3$anno_new2[rownames(pseudotime_SP.df)]

pseudotime_SP.df2 %>% head

In [None]:
options(repr.plot.width=10, repr.plot.height=6)

ggplot(pseudotime_SP.df2, aes(x = Lineage1, y = anno, colour = anno)) +
    geom_jitter(size=1) +
    theme_classic() +
    xlab("Pseudotime") + ylab("Celltype") +
    scale_color_manual(values = pal12)+
    ggtitle(paste("CD4 lineage cells ordered by pseudotime"))&NoLegend()

ggplot(pseudotime_SP.df2, aes(x = Lineage2, y = anno, colour = anno)) +
    geom_jitter(size=1) +
    theme_classic() +
    xlab("Pseudotime") + ylab("Celltype") +
    scale_color_manual(values = pal12)+
    ggtitle(paste("CD8 lineage cells ordered by pseudotime"))&NoLegend()    

Both lineage pseudotimes contain some immature cells of the other lineage.

In [None]:
seurObj_SP2$spt_CD4 <- NA
seurObj_SP2$spt_CD8 <- NA
seurObj_SP2@meta.data[rownames(pseudotime_SP.df2),'spt_CD4'] <- pseudotime_SP.df2$Lineage1
seurObj_SP2@meta.data[rownames(pseudotime_SP.df2),'spt_CD8'] <- pseudotime_SP.df2$Lineage2

In [None]:
seurObj_SP3$spt_CD4 <- NA
seurObj_SP3$spt_CD8 <- NA
seurObj_SP3@meta.data[rownames(pseudotime_SP.df2),'spt_CD4'] <- pseudotime_SP.df2$Lineage1
seurObj_SP3@meta.data[rownames(pseudotime_SP.df2),'spt_CD8'] <- pseudotime_SP.df2$Lineage2

In [None]:
options(repr.plot.width=13, repr.plot.height=6)

FeaturePlot(seurObj_SP2, features = c('spt_CD4', 'spt_CD8'), reduction = 'umap_wnn_SP2')
FeaturePlot(seurObj_SP3, features = c('spt_CD4', 'spt_CD8'), reduction = 'umap_wnn_SP2')
FeaturePlot(seurObj_SP3, features = c('spt_CD4', 'spt_CD8'), reduction = 'umap_wnn_SP3')

We remove these ambiguous cells that are associated with the pseudotime of the opposite lineage.

In [None]:
cells_ambig <- c(rownames(subset(seurObj_SP2@meta.data, anno_new2 %in% c("SP_CD8_immature", "SP_CD8_semimature", "SP_CD8_mature") & ! is.na(seurObj_SP2$spt_CD4))),
                 rownames(subset(seurObj_SP2@meta.data, anno_new2 %in% c("SP_CD4_immature", "SP_CD4_semimature", "SP_CD4_mature") & ! is.na(seurObj_SP2$spt_CD8))))

In [None]:
seurObj_SP2@meta.data[cells_ambig, 'anno_new2'] <- 'unknown'

In [None]:
options(repr.plot.width=10, repr.plot.height=6)

ggplot(seurObj_SP2@meta.data, aes(x = spt_CD4, y = anno_new2, colour = anno_new2)) +
    geom_jitter(size=1) +
    theme_classic() +
    xlab("Pseudotime") + ylab("Celltype") +
    scale_color_manual(values = pal12)+
    ggtitle(paste("CD4 lineage cells ordered by pseudotime"))&NoLegend()

ggplot(seurObj_SP2@meta.data, aes(x = spt_CD8, y = anno_new2, colour = anno_new2)) +
    geom_jitter(size=1) +
    theme_classic() +
    xlab("Pseudotime") + ylab("Celltype") +
    scale_color_manual(values = pal12)+
    ggtitle(paste("CD8 lineage cells ordered by pseudotime"))&NoLegend()    

In [None]:
options(repr.plot.width=10, repr.plot.height=6)

ggplot(seurObj_SP2@meta.data, aes(x = spt_CD4, y = anno_CITE_old, colour = anno_CITE_old)) +
    geom_jitter(size=1) +
    theme_classic() +
    xlab("Pseudotime") + ylab("Celltype") +
    scale_color_manual(values = pal36)+
    ggtitle(paste("CD4 lineage cells ordered by pseudotime"))&NoLegend()

ggplot(seurObj_SP2@meta.data, aes(x = spt_CD8, y = anno_CITE_old, colour = anno_CITE_old)) +
    geom_jitter(size=1) +
    theme_classic() +
    xlab("Pseudotime") + ylab("Celltype") +
    scale_color_manual(values = pal36)+
    ggtitle(paste("CD8 lineage cells ordered by pseudotime"))&NoLegend()    

We can transfer the SP annotation to the full object.

In [None]:
seurObj$anno_new[colnames(seurObj) %in% colnames(seurObj_SP2)] <- seurObj_SP2$anno_new2

In [None]:
table(seurObj$anno_new)

In [None]:
options(repr.plot.width=15, repr.plot.height=10)


DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'anno_new', cols=(pal24), shuffle=T, raster=F)

#### Tregs

The Treg cluster clearly stands out and was alredy assigned previously. We reintegrate these cells and determine the subtypes.

In [None]:
seurObj_Treg <- seurObj_Treg  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_Treg')

In [None]:
MNN <- reducedMNN(seurObj_Treg@reductions$pca_adt_Treg@cell.embeddings,
                 batch=seurObj_Treg$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_Treg[["mnn_adt_Treg"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_Treg <- seurObj_Treg  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_Treg')

In [None]:
MNN <- reducedMNN(seurObj_Treg@reductions$pca_rna_Treg@cell.embeddings,
                 batch=seurObj_Treg$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_Treg[["mnn_rna_Treg"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_Treg@reductions$pca_adt_Treg, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_Treg@reductions$pca_rna_Treg, 0.1)
dim_rna

seurObj_Treg <- FindMultiModalNeighbors(seurObj_Treg,
                                  reduction.list=list('mnn_adt_Treg', 'mnn_rna_Treg'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_Treg <- RunUMAP(seurObj_Treg, nn.name = "weighted.nn", reduction.name = "umap_wnn_Treg",
                        reduction.key = "wnnUMAP_")

In [None]:
seurObj_Treg <- RunSPCA(seurObj_Treg, assay='RNA', graph='wsnn')

In [None]:
ptm <- proc.time()

seurObj_Treg <- FindNeighbors(seurObj_Treg, graph.name = 'wsnn',reduction= 'spca', dims = 1:max(c(dim_adt, dim_rna)), assay='ADTdsb')
seurObj_Treg <- FindClusters(seurObj_Treg, graph.name = 'wsnn', algorithm = 4, resolution = 1, random.seed = 123, method = 'igraph', cluster.name='clusters_Treg')

proc.time() - ptm

In [None]:
seurObj_Treg <- RunUMAP(seurObj_Treg, assay = 'ADTdsb', dims = 1:dim_adt, reduction = 'mnn_adt_Treg',
                        reduction.name = "umap_adt_Treg", reduction.key = "wnnUMAP_")

In [None]:
ptm <- proc.time()

seurObj_Treg <- FindNeighbors(seurObj_Treg, reduction= 'mnn_adt_Treg', dims=1:dim_adt, assay='ADTdsb')
seurObj_Treg <- FindClusters(seurObj_Treg, algorithm = 4, resolution = 1.2, random.seed = 123,
                             method = 'igraph', cluster.name='clusters_Treg_adt')

proc.time() - ptm

In [None]:
options(repr.plot.width=12, repr.plot.height=7)

DimPlot(seurObj_Treg, reduction = 'umap_adt_Treg', group.by = 'anno_CITE_old', cols=pal24)

options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_Treg, reduction = 'umap_adt_Treg', group.by = 'clusters_Treg_adt', cols=pal12)


In [None]:
options(repr.plot.width=12, repr.plot.height=7)

DimPlot(seurObj_Treg, reduction = 'umap_wnn_Treg', group.by = 'anno_CITE_old', cols=pal24)

options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_Treg, reduction = 'umap_wnn_Treg', group.by = 'clusters_Treg_adt', cols=pal12)

DimPlot(seurObj_Treg, reduction = 'umap_wnn_Treg', group.by = 'clusters_Treg', cols=c(pal12,'grey30'), label=T)


Recirculating Tregs clearly cluster separately, for the remaining subsets the identity is not so clear.

In [None]:
markers_Treg <- c('CD137', 'CD146', 'CD18','CD183','CD194', 'CD195', 'CD1a', 'CD1c', 'CD2', 'CD224', 'CD226', 'CD25', 'CD278', 'CD279', 'CD29', 'CD3', 'CD31', 'CD352', 'CD38', 'CD39', 'CD45RA', 'CD47', 'CD48', 'CD49a', 'CD49d', 'CD49f', 'CD54', 'CD62L', 'CD7', 'CD71', 'CD73', 'CD8', 'CD4', 'CD95', 'Integrin-B7', 'TIGIT', 'CD357', 'CD122', 'CD45RO', 'CD196','CD146', 'CD134', 'CD103', 'CD152','TCRab', 'CD27')
markers_Treg_rna <- c('FOXP3', 'CD8A', 'CD8B', 'CD40LG', 'NR4A1', 'ITM2A', 'KLF2', 'FOXO1', 'RAG2')

In [None]:
options(repr.plot.width=16, repr.plot.height=30)

RidgePlot(seurObj_Treg, group.by = 'clusters_Treg_adt', features = sort(markers_Treg), ncol=6, cols=c(pal12,'grey30'))

In [None]:
options(repr.plot.width=16, repr.plot.height=8)

VlnPlot(seurObj_Treg, assay = 'RNA', group.by = 'clusters_Treg_adt', features = sort(markers_Treg_rna), ncol=5, cols=c(pal12,'grey30'), pt.size = 0)

In [None]:
table(seurObj_Treg$anno_CITE_old, seurObj_Treg$clusters_Treg_adt)

In [None]:
options(repr.plot.width=16, repr.plot.height=8)
FeatureScatter(seurObj_Treg, feature1 = 'CD4', feature2 = 'CD8', cols=pal24, group.by = 'anno_CITE_old')+facet_wrap(~seurObj_Treg$clusters_Treg_adt, ncol=4)+theme_bw()
FeatureScatter(seurObj_Treg, feature1 = 'CD45RA', feature2 = 'CD1a', cols=pal24, group.by = 'anno_CITE_old')+facet_wrap(~seurObj_Treg$clusters_Treg_adt, ncol=4)+theme_bw()
FeatureScatter(seurObj_Treg, feature1 = 'CD45RO', feature2 = 'CD1a', cols=pal24, group.by = 'anno_CITE_old')+facet_wrap(~seurObj_Treg$clusters_Treg_adt, ncol=4)+theme_bw()
FeatureScatter(seurObj_Treg, feature1 = 'CD27', feature2 = 'CD101', cols=pal24, group.by = 'anno_CITE_old')+facet_wrap(~seurObj_Treg$clusters_Treg_adt, ncol=4)+theme_bw()
FeatureScatter(seurObj_Treg, feature1 = 'CD25', feature2 = 'CD279', cols=pal24, group.by = 'anno_CITE_old')+facet_wrap(~seurObj_Treg$clusters_Treg_adt, ncol=4)+theme_bw()
FeatureScatter(seurObj_Treg, feature1 = 'CD39', feature2 = 'CD224', cols=pal24, group.by = 'anno_CITE_old')+facet_wrap(~seurObj_Treg$clusters_Treg_adt, ncol=4)+theme_bw()
FeatureScatter(seurObj_Treg, feature1 = 'CD4', feature2 = 'CD45RA', cols=pal24, group.by = 'anno_CITE_old')+facet_wrap(~seurObj_Treg$clusters_Treg_adt, ncol=4)+theme_bw()


Based on cluster analysis and comparison with previous marker finding efforts we can annotate several stages/subsets:

In [None]:
seurObj_Treg$anno_new4 <- 'unknown'
seurObj_Treg$anno_new4 <- ifelse(colnames(seurObj_Treg) %in% WhichCells(seurObj_Treg, expression = CD45RA <20 & CD4 > 5 & CD8 <10, idents=c('3', '5', '6', '7')), "SP_Treg_immature", seurObj_Treg$anno_new4)
seurObj_Treg$anno_new4 <- ifelse(colnames(seurObj_Treg) %in% WhichCells(seurObj_Treg, expression = CD1a <6 & CD4 > 5 & CD8 <10, idents=c('1','2', '4', '8')), "SP_Treg_mature", seurObj_Treg$anno_new4)
seurObj_Treg$anno_new4 <- ifelse(colnames(seurObj_Treg) %in% WhichCells(seurObj_Treg, idents=c('9')), "SP_Treg_CD8", seurObj_Treg$anno_new4)
seurObj_Treg$anno_new4 <- ifelse(colnames(seurObj_Treg) %in% WhichCells(seurObj_Treg, expression = CD1a <6 & CD8 <10 & CD279 > 15 & CD224 > 25, idents='1'), "SP_Treg_PD1", seurObj_Treg$anno_new4)
seurObj_Treg$anno_new4 <- ifelse(colnames(seurObj_Treg) %in% WhichCells(seurObj_Treg, idents=c('10')), "SP_Treg_recirc", seurObj_Treg$anno_new4)

In [None]:
options(repr.plot.width=16, repr.plot.height=8)
FeatureScatter(seurObj_Treg, feature1 = 'CD4', feature2 = 'CD8', cols=pal12, group.by = 'anno_new4')+facet_wrap(~seurObj_Treg$clusters_Treg_adt, ncol=4)+theme_bw()
FeatureScatter(seurObj_Treg, feature1 = 'CD45RA', feature2 = 'CD1a', cols=pal12, group.by = 'anno_new4')+facet_wrap(~seurObj_Treg$clusters_Treg_adt, ncol=4)+theme_bw()
FeatureScatter(seurObj_Treg, feature1 = 'CD45RO', feature2 = 'CD1a', cols=pal12, group.by = 'anno_new4')+facet_wrap(~seurObj_Treg$clusters_Treg_adt, ncol=4)+theme_bw()
FeatureScatter(seurObj_Treg, feature1 = 'CD27', feature2 = 'CD101', cols=pal12, group.by = 'anno_new4')+facet_wrap(~seurObj_Treg$clusters_Treg_adt, ncol=4)+theme_bw()
FeatureScatter(seurObj_Treg, feature1 = 'CD25', feature2 = 'CD134', cols=pal12, group.by = 'anno_new4')+facet_wrap(~seurObj_Treg$clusters_Treg_adt, ncol=4)+theme_bw()
FeatureScatter(seurObj_Treg, feature1 = 'CD4', feature2 = 'CD45RA', cols=pal12, group.by = 'anno_new4')+facet_wrap(~seurObj_Treg$clusters_Treg_adt, ncol=4)+theme_bw()


In [None]:

options(repr.plot.width=9, repr.plot.height=7)

DimPlot(seurObj_Treg, reduction = 'umap_adt_Treg', group.by = 'anno_new4', cols=pal12)

DimPlot(seurObj_Treg, reduction = 'umap_wnn_Treg', group.by = 'anno_new4', cols=pal12)

DimPlot(seurObj_Treg, reduction = 'umap_wnn_Treg', group.by = 'anno_CITE_old', cols=pal24)


In [None]:
options(repr.plot.width=12, repr.plot.height=8)

df <- count(seurObj_Treg@meta.data, barcode, anno_CITE_old, anno_new4) %>%
        mutate(id = row_number())%>%
        pivot_longer(cols=2:3, names_to = 'version', values_to = 'celltype')

df$version <- factor(df$version, level=c("anno_new4", "anno_CITE_old"))

ggplot(df, aes(x = version, y = n, 
                          stratum = celltype, fill = celltype,
                          alluvium = id)) +
  geom_stratum(alpha = .5) +
  geom_flow()+
  theme_minimal()+
  scale_fill_manual(values = c(pal24))

In [None]:
seurObj_SP$anno_new[colnames(seurObj_SP) %in% colnames(seurObj_Treg)] <- seurObj_Treg$anno_new4

In [None]:
options(repr.plot.width=9, repr.plot.height=7)


DimPlot(seurObj_SP, reduction = 'umap_wnn_SP', group.by = 'anno_new', cols=sample(pal24))


In [None]:
seurObj$anno_new[colnames(seurObj) %in% colnames(seurObj_Treg)] <- seurObj_Treg$anno_new4

In [None]:
table(seurObj$anno_new)

In [None]:
options(repr.plot.width=12, repr.plot.height=8)

DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'anno_new', cols=sample(c(pal24, 'grey50')), shuffle=T, raster=F)

### B/DCs

These two cell types form a clearly distinct cluster. We can annotate DCs and B cells as a whole but due to the low coverage of these cell types we do not attempt to profile any subtypes.

In [None]:
options(repr.plot.width=9, repr.plot.height=8)

DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'clusters_rough', cols=pal12, shuffle=T, raster=F)

In [None]:
options(repr.plot.width=9, repr.plot.height=8)
FeatureScatter(subset(seurObj, subset=clusters_rough=='12'), feature1 = 'CD19', feature2 = 'CD4', group.by = 'anno_CITE_old')

In [None]:
Idents(seurObj) <- seurObj$clusters_rough

In [None]:
seurObj$anno_new <- ifelse(colnames(seurObj) %in% WhichCells(seurObj, expression = CD19 >20, idents=c('12')), "B", seurObj$anno_new)
seurObj$anno_new <- ifelse(colnames(seurObj) %in% WhichCells(seurObj, expression = CD19 <15, idents=c('12')), "DC", seurObj$anno_new)


In [None]:
options(repr.plot.width=12, repr.plot.height=8)

DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'anno_new', cols=pal36, shuffle=T, raster=F)

### Innate(-like)

The final cluster contains NK(T) cells, gd T cells and CD8aa IELs.

In [None]:
seurObj_innate <- subset(seurObj, subset=clusters_rough %in% c('10'))

In [None]:
seurObj_innate <- seurObj_innate  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_innate')

In [None]:
MNN <- reducedMNN(seurObj_innate@reductions$pca_adt_innate@cell.embeddings,
                 batch=seurObj_innate$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_innate[["mnn_adt_innate"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_innate <- seurObj_innate  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_innate')

In [None]:
MNN <- reducedMNN(seurObj_innate@reductions$pca_rna_innate@cell.embeddings,
                 batch=seurObj_innate$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_innate[["mnn_rna_innate"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_innate@reductions$pca_adt_innate, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_innate@reductions$pca_rna_innate, 0.1)
dim_rna

seurObj_innate <- FindMultiModalNeighbors(seurObj_innate,
                                  reduction.list=list('mnn_adt_innate', 'mnn_rna_innate'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_innate <- RunUMAP(seurObj_innate, nn.name = "weighted.nn", reduction.name = "umap_wnn_innate",
                        reduction.key = "wnnUMAP_")

In [None]:
seurObj_SP <- RunSPCA(seurObj_SP, assay='RNA', graph='wsnn')

In [None]:
ptm <- proc.time()

seurObj_innate <- FindNeighbors(seurObj_innate, graph.name = 'wsnn',reduction= 'spca', dims = 1:max(c(dim_adt, dim_rna)), assay='ADTdsb')
seurObj_innate <- FindClusters(seurObj_innate, graph.name = 'wsnn', algorithm = 4, resolution = 1, random.seed = 123,
                               method = 'igraph', cluster.name='clusters_innate')

proc.time() - ptm

In [None]:
seurObj_innate <- RunUMAP(seurObj_innate, reduction = 'mnn_adt_innate', dims = 1:dim_adt, reduction.name = "umap_adt_innate",
                        reduction.key = "adtUMAP_")

In [None]:
ptm <- proc.time()

seurObj_innate <- FindNeighbors(seurObj_innate, reduction= 'mnn_adt_innate', dims=1:dim_adt, assay='ADTdsb')
seurObj_innate <- FindClusters(seurObj_innate, algorithm = 4, resolution = 1, random.seed = 123,
                             method = 'igraph', cluster.name='clusters_innate_adt')

proc.time() - ptm

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_innate, reduction = 'umap_wnn_innate', group.by = 'clusters_innate', cols=pal12)

DimPlot(seurObj_innate, reduction = 'umap_wnn_innate', group.by = 'clusters_innate_adt', cols=pal12)

options(repr.plot.width=12, repr.plot.height=7)

DimPlot(seurObj_innate, reduction = 'umap_wnn_innate', group.by = 'anno_CITE_old', cols=sample(pal36))


Remarkably, the separation of T/NK cells is much worse in the newly integrated UMAP. Nevertheless, there are some distinct markers that can be used to break apart the subset into distinct lineages.

In [None]:
table(seurObj_innate$anno_CITE_old, seurObj_innate$clusters_innate)

In [None]:
table(seurObj_innate$anno_CITE_old, seurObj_innate$clusters_innate_adt)

In [None]:
options(repr.plot.width=20, repr.plot.height=8)

FeatureScatter(seurObj_innate, feature1 = 'CD56', feature2 = 'CD3', group.by = "anno_CITE_old", cols=sample(pal36))+facet_wrap(~seurObj_innate$clusters_innate_adt, ncol=5)+theme_bw()
FeatureScatter(seurObj_innate, feature1 = 'CD56', feature2 = 'CD16', group.by = "anno_CITE_old", cols=sample(pal36))+facet_wrap(~seurObj_innate$clusters_innate_adt, ncol=5)+theme_bw()
FeatureScatter(seurObj_innate, feature1 = 'CD161', feature2 = 'CD3', group.by = "anno_CITE_old", cols=sample(pal36))+facet_wrap(~seurObj_innate$clusters_innate_adt, ncol=5)+theme_bw()

Nk cells express CD56/CD16/CD161 but not CD3. All remaining cells are most likely CD8aa/gdT cells.

In [None]:
seurObj_NK <- subset(seurObj_innate, subset=clusters_innate_adt %in% c('1', '4', '6', '10') & CD3 <8 & (CD56 >35 | CD16 >20 | CD161 > 15))
seurObj_aagd <- subset(seurObj_innate, cells = colnames(seurObj_NK), invert=T)

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_innate, reduction = 'umap_wnn_innate', cells.highlight = colnames(seurObj_NK))

DimPlot(seurObj, reduction = 'umap_wnn_mnn', cells.highlight = colnames(seurObj_NK))
DimPlot(seurObj, reduction = 'umap_wnn_mnn', cells.highlight = colnames(seurObj_aagd))

While the selection in the integrated UMAP is quite scattered, it clearly identifies the NK protrusion in the full UMAP.

#### NK

In [None]:
seurObj_NK <- seurObj_NK  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_NK')

In [None]:
MNN <- reducedMNN(seurObj_NK@reductions$pca_adt_NK@cell.embeddings,
                 batch=seurObj_NK$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_NK[["mnn_adt_NK"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_NK <- seurObj_NK  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_NK')

In [None]:
MNN <- reducedMNN(seurObj_NK@reductions$pca_rna_NK@cell.embeddings,
                 batch=seurObj_NK$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_NK[["mnn_rna_NK"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_NK@reductions$pca_adt_NK, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_NK@reductions$pca_rna_NK, 0.1)
dim_rna

seurObj_NK <- FindMultiModalNeighbors(seurObj_NK,
                                  reduction.list=list('mnn_adt_NK', 'mnn_rna_NK'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_NK <- RunUMAP(seurObj_NK, nn.name = "weighted.nn", reduction.name = "umap_wnn_NK",
                        reduction.key = "wnnUMAP_")

In [None]:
seurObj_NK <- RunSPCA(seurObj_NK, assay='RNA', graph='wsnn')

In [None]:
ptm <- proc.time()

seurObj_NK <- FindNeighbors(seurObj_NK, graph.name = 'wsnn',reduction= 'spca', dims = 1:max(c(dim_adt, dim_rna)), assay='ADTdsb')
seurObj_NK <- FindClusters(seurObj_NK, graph.name = 'wsnn', algorithm = 4, resolution = 1, random.seed = 123,
                               method = 'igraph', cluster.name='clusters_NK')

proc.time() - ptm

In [None]:
seurObj_NK <- RunUMAP(seurObj_NK, reduction = 'mnn_adt_NK', dims = 1:dim_adt, reduction.name = "umap_adt_NK",
                        reduction.key = "adtUMAP_")

In [None]:
ptm <- proc.time()

seurObj_NK <- FindNeighbors(seurObj_NK, reduction= 'mnn_adt_NK', dims=1:dim_adt, assay='ADTdsb')
seurObj_NK <- FindClusters(seurObj_NK, algorithm = 4, resolution = 1, random.seed = 123,
                             method = 'igraph', cluster.name='clusters_NK_adt')

proc.time() - ptm

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_NK, reduction = 'umap_wnn_NK', group.by = 'clusters_NK', cols=pal12)

DimPlot(seurObj_NK, reduction = 'umap_adt_NK', group.by = 'clusters_NK_adt', cols=pal12)

options(repr.plot.width=10, repr.plot.height=7)

DimPlot(seurObj_NK, reduction = 'umap_wnn_NK', group.by = 'anno_CITE_old', cols=pal12)
DimPlot(seurObj_NK, reduction = 'umap_adt_NK', group.by = 'anno_CITE_old', cols=pal12)


In [None]:
options(repr.plot.width=18, repr.plot.height=4)
VlnPlot(seurObj_NK, features=c('CD3', 'TCRab', 'TCR-Vd2', 'CD28', 'CD4', 'CD8'), cols=pal12, group.by = 'clusters_NK', ncol=6)
VlnPlot(seurObj_NK, features=c('CD3', 'TCRab', 'TCR-Vd2', 'CD28', 'CD4', 'CD8'), cols=pal12, group.by = 'clusters_NK_adt', ncol=6)

In [None]:
table(seurObj_NK$anno_CITE_old, seurObj_NK$clusters_NK_adt)

adt cluster 4 are NKTdev, cl5 are 56lo16hi, cl3 are 56hi16lo, 1+2 are tissue resident.

In [None]:
markers_NK <- c('CD11b', 'CD11c','CD122', 'CD158b', 'CD16', 'CD103', 'CD161','CD172a', 'CD195','CD224', 'CD226', 'CD244', 'CD26', 'CD27','CD314', 'CD328', 'CD335', 'CD352', 'CD38', 'CD39', 'CD49a', 'CD56', 'CD62L', 'CD71', 'CD94', 'CX3CR1', 'GPR56', 'Integrin-B7', 'KLRG1', 'TIGIT', 'CD183', 'CD69', 'CD85j', 'CD2', 'CD57')

In [None]:
options(repr.plot.width=18, repr.plot.height=20)

VlnPlot(seurObj_NK, features=sort(markers_NK), group.by = 'clusters_NK_adt', ncol=6, cols=pal12)

In [None]:
options(repr.plot.width=20, repr.plot.height=4)

FeatureScatter(seurObj_NK, feature1 = 'CD56', feature2 = 'CD16', group.by = 'anno_CITE_old', cols=pal12)+facet_wrap(~seurObj_NK$clusters_NK_adt, ncol=5)+theme_bw()
FeatureScatter(seurObj_NK, feature1 = 'Integrin-B7', feature2 = 'CD103', group.by = 'anno_CITE_old', cols=pal12)+facet_wrap(~seurObj_NK$clusters_NK_adt, ncol=5)+theme_bw()
FeatureScatter(seurObj_NK, feature1 = 'CD49a', feature2 = 'CD103', group.by = 'anno_CITE_old', cols=pal12)+facet_wrap(~seurObj_NK$clusters_NK_adt, ncol=5)+theme_bw()

In the previous marker-search analysis we had identified clear subsets of CD56hi16lo and CD56loCD16hi circulating NK cells as well as integrin hi/lo tissue resident NK cells and developing NKT cells. The new analysis reproduces these findings and allows us to annotate the cells accordingly.

In [None]:
seurObj_NK$anno_new4 <- 'unknown'
seurObj_NK$anno_new4 <- ifelse(colnames(seurObj_NK) %in% WhichCells(seurObj_NK, expression = (CD103 < 30 | `Integrin-B7` <15) & CD3 <4, idents = c('1', '2')), "NK_tr_itg_lo", seurObj_NK$anno_new4)
seurObj_NK$anno_new4 <- ifelse(colnames(seurObj_NK) %in% WhichCells(seurObj_NK, expression = CD103 > 30 & `Integrin-B7` >15 & CD3 <4, idents = c('1', '2')), "NK_tr_itg_hi", seurObj_NK$anno_new4)
seurObj_NK$anno_new4 <- ifelse(colnames(seurObj_NK) %in% WhichCells(seurObj_NK, expression = CD56 > 30 & CD16 < 40, idents = c('3')), "NK_circ_56hi16lo", seurObj_NK$anno_new4)
seurObj_NK$anno_new4 <- ifelse(colnames(seurObj_NK) %in% WhichCells(seurObj_NK, expression = CD16 >40 , idents = c('5')), "NK_circ_56lo16hi", seurObj_NK$anno_new4)
seurObj_NK$anno_new4 <- ifelse(colnames(seurObj_NK) %in% WhichCells(seurObj_NK, idents = c('4')), "NKT_dev", seurObj_NK$anno_new4)

In [None]:
options(repr.plot.width=8, repr.plot.height=7)


DimPlot(seurObj_NK, reduction = 'umap_adt_NK', group.by = 'anno_new4', cols=pal12)

In [None]:
table(seurObj_NK$anno_CITE_old, seurObj_NK$anno_new4)

In [None]:
options(repr.plot.width=12, repr.plot.height=8)

df <- count(seurObj_NK@meta.data, barcode, anno_CITE_old, anno_new4) %>%
        mutate(id = row_number())%>%
        pivot_longer(cols=2:3, names_to = 'version', values_to = 'celltype')

ggplot(df, aes(x = version, y = n, 
                          stratum = celltype, fill = celltype,
                          alluvium = id)) +
  geom_stratum(alpha = .5) +
  geom_flow()+
  theme_minimal()+
  scale_fill_manual(values = c(pal12, 'grey30'))

In [None]:
seurObj$anno_new[colnames(seurObj) %in% colnames(seurObj_NK)] <- seurObj_NK$anno_new4

#### CD8aa/gdT

CD8aa and gdT cells are relatively similar both on RNA an surface marker level and since the gdTCR antibody does not work reliably, it is difficult to separate the subsets just based on surface marker thresholds.

In [None]:
seurObj_aagd <- seurObj_aagd  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_aagd')

In [None]:
MNN <- reducedMNN(seurObj_aagd@reductions$pca_adt_aagd@cell.embeddings,
                 batch=seurObj_aagd$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_aagd[["mnn_adt_aagd"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_aagd <- seurObj_aagd  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_aagd')

In [None]:
MNN <- reducedMNN(seurObj_aagd@reductions$pca_rna_aagd@cell.embeddings,
                 batch=seurObj_aagd$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_aagd[["mnn_rna_aagd"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_aagd@reductions$pca_adt_aagd, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_aagd@reductions$pca_rna_aagd, 0.1)
dim_rna

seurObj_aagd <- FindMultiModalNeighbors(seurObj_aagd,
                                  reduction.list=list('mnn_adt_aagd', 'mnn_rna_aagd'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_aagd <- RunUMAP(seurObj_aagd, nn.name = "weighted.nn", reduction.name = "umap_wnn_aagd",
                        reduction.key = "wnnUMAP_")

In [None]:
seurObj_aagd <- RunSPCA(seurObj_aagd, assay='RNA', graph='wsnn')

In [None]:
ptm <- proc.time()

seurObj_aagd <- FindNeighbors(seurObj_aagd, graph.name = 'wsnn',reduction= 'spca', dims = 1:max(c(dim_adt, dim_rna)), assay='ADTdsb')
seurObj_aagd <- FindClusters(seurObj_aagd, graph.name = 'wsnn', algorithm = 4, resolution = 1, random.seed = 123,
                               method = 'igraph', cluster.name='clusters_aagd')

proc.time() - ptm

In [None]:
seurObj_aagd <- RunUMAP(seurObj_aagd, reduction = 'mnn_adt_aagd', dims = 1:dim_adt, reduction.name = "umap_adt_aagd",
                        reduction.key = "adtUMAP_")

In [None]:
ptm <- proc.time()

seurObj_aagd <- FindNeighbors(seurObj_aagd, reduction= 'mnn_adt_aagd', dims=1:dim_adt, assay='ADTdsb')
seurObj_aagd <- FindClusters(seurObj_aagd, algorithm = 4, resolution = 1, random.seed = 123,
                             method = 'igraph', cluster.name='clusters_aagd_adt')

proc.time() - ptm

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_aagd, reduction = 'umap_wnn_aagd', group.by = 'clusters_aagd', cols=pal24)

DimPlot(seurObj_aagd, reduction = 'umap_adt_aagd', group.by = 'clusters_aagd_adt', cols=pal24)

options(repr.plot.width=12, repr.plot.height=7)

DimPlot(seurObj_aagd, reduction = 'umap_wnn_aagd', group.by = 'anno_CITE_old', cols=pal36)
DimPlot(seurObj_aagd, reduction = 'umap_adt_aagd', group.by = 'anno_CITE_old', cols=pal36)


In [None]:
table(seurObj_aagd$anno_CITE_old, seurObj_aagd$clusters_aagd)

In [None]:
options(repr.plot.width=20, repr.plot.height=20)

VlnPlot(seurObj_aagd, group.by = 'clusters_aagd', features = c('TCRgd', 'TCR-Vd2', 'CD62L', 'CD21', 'CD226', 'CD146', 'CD3', 'CD224', 'CD1a', 'CD1c', 'CD73', 'CD244', 'CD54', 'CD44', 'CD31','CD122', 'CD56','Integrin-B7', 'CD27', 'CD10', 'CD49f', 'CD11c', 'CD103', 'CD172a', 'CD4','CD8', 'CD34', 'CD24', 'TCRab'), pt.size = 0.001, ncol=5, cols=pal24)

options(repr.plot.width=16, repr.plot.height=8)

VlnPlot(seurObj_aagd, group.by = 'clusters_aagd', features = c('KLF2', 'S1PR1','TRDC', 'TRBC2', 'TRAC', 'TRGC1', 'TRGC2', 'RAG2', 'CD8A', 'CD8B'), pt.size = 0.001, ncol=5, cols=pal24, assay='RNA')

In [None]:
options(repr.plot.width=20, repr.plot.height=10)

FeatureScatter(seurObj_aagd, feature1 = 'TCRab', feature2 = 'CD3', group.by = 'anno_CITE_old', cols=pal36)+facet_wrap(~seurObj_aagd$clusters_aagd, ncol=5)+theme_bw()
FeatureScatter(seurObj_aagd, feature1 = 'TCRgd', feature2 = 'CD8', group.by = 'anno_CITE_old', cols=pal36)+facet_wrap(~seurObj_aagd$clusters_aagd, ncol=5)+theme_bw()
FeatureScatter(seurObj_aagd, feature1 = 'TCR-Vd2', feature2 = 'CD8', group.by = 'anno_CITE_old', cols=pal36)+facet_wrap(~seurObj_aagd$clusters_aagd, ncol=5)+theme_bw()

Based on the expression of CD8 and TCR we can identify clusters associated with CD8aas, gdT cells, and a Vd2/CD8memory mixed cluster.

In [None]:
seurObj_gdT <- subset(seurObj_aagd, subset= TCRab <7 & clusters_aagd %in% c('6', '9', '10'))
seurObj_8aa <- subset(seurObj_aagd, subset= TCRgd <10 & CD8 > 5 & clusters_aagd %in% c('1', '2', '3', '4', '5', '8', '11', '12', '13'))
                      

In [None]:
seurObj_gdT <- seurObj_gdT  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_gdT')

In [None]:
MNN <- reducedMNN(seurObj_gdT@reductions$pca_adt_gdT@cell.embeddings,
                 batch=seurObj_gdT$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_gdT[["mnn_adt_gdT"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_gdT <- seurObj_gdT  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_gdT')

In [None]:
MNN <- reducedMNN(seurObj_gdT@reductions$pca_rna_gdT@cell.embeddings,
                 batch=seurObj_gdT$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_gdT[["mnn_rna_gdT"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_gdT@reductions$pca_adt_gdT, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_gdT@reductions$pca_rna_gdT, 0.1)
dim_rna

seurObj_gdT <- FindMultiModalNeighbors(seurObj_gdT,
                                  reduction.list=list('mnn_adt_gdT', 'mnn_rna_gdT'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_gdT <- RunUMAP(seurObj_gdT, nn.name = "weighted.nn", reduction.name = "umap_wnn_gdT",
                        reduction.key = "wnnUMAP_")

In [None]:
seurObj_gdT <- RunSPCA(seurObj_gdT, assay='RNA', graph='wsnn')

In [None]:
ptm <- proc.time()

seurObj_gdT <- FindNeighbors(seurObj_gdT, graph.name = 'wsnn',reduction= 'spca', dims = 1:max(c(dim_adt, dim_rna)), assay='ADTdsb')
seurObj_gdT <- FindClusters(seurObj_gdT, graph.name = 'wsnn', algorithm = 4, resolution = 1, random.seed = 123,
                               method = 'igraph', cluster.name='clusters_gdT')

proc.time() - ptm

In [None]:
seurObj_gdT <- RunUMAP(seurObj_gdT, reduction = 'mnn_adt_gdT', dims = 1:dim_adt, reduction.name = "umap_adt_gdT",
                        reduction.key = "adtUMAP_")

In [None]:
ptm <- proc.time()

seurObj_gdT <- FindNeighbors(seurObj_gdT, reduction= 'mnn_adt_gdT', dims=1:dim_adt, assay='ADTdsb')
seurObj_gdT <- FindClusters(seurObj_gdT, algorithm = 4, resolution = 1.5, random.seed = 123,
                             method = 'igraph', cluster.name='clusters_gdT_adt')

proc.time() - ptm

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_gdT, reduction = 'umap_wnn_gdT', group.by = 'clusters_gdT', cols=pal12)

DimPlot(seurObj_gdT, reduction = 'umap_adt_gdT', group.by = 'clusters_gdT_adt', cols=pal12)

options(repr.plot.width=10, repr.plot.height=7)

DimPlot(seurObj_gdT, reduction = 'umap_wnn_gdT', group.by = 'anno_CITE_old', cols=pal24)
DimPlot(seurObj_gdT, reduction = 'umap_adt_gdT', group.by = 'anno_CITE_old', cols=pal24)


In [None]:
options(repr.plot.width=16, repr.plot.height=8)

FeatureScatter(seurObj_gdT, feature1 = 'CD1a', feature2 = 'CD27', cols=pal24, group.by = 'anno_CITE_old')+facet_wrap(~seurObj_gdT$clusters_gdT_adt, ncol=4)+theme_bw()

In [None]:
options(repr.plot.width=16, repr.plot.height=20)
markers_gd <- c('CD1a','CD45RA', 'CD45RO', 'CD73', 'CD3', 'CD7', 'CD44', 'CD24', 'CD27', 'rna_GNG4', 'rna_ZNF683', 'CD279', 'CD10', 'CD137', 'CD62L','TCRgd', 'CD21', 'CD226', 'CD146', 'CD224', 'CD1c', 'CD244', 'CD54', 'CD31','CD122', 'CD56', 'CD194','CD94','CD278', 'CD5', 'CD314', 'CD26', 'CD161', 'CD196', 'rna_TRGC1', 'rna_TRGC2','rna_NCR3')
VlnPlot(seurObj_gdT, group.by = 'clusters_gdT_adt', features = sort(markers_gd), pt.size = 0.001, ncol=5, cols=pal12)

In [None]:
table(seurObj_gdT$anno_CITE_old, seurObj_gdT$clusters_aagd)

In [None]:
options(repr.plot.width=16, repr.plot.height=4)

FeatureScatter(seurObj_gdT, feature1 = 'CD1a', feature2 = 'CD27', cols=pal24, group.by = 'anno_CITE_old')+facet_wrap(~seurObj_gdT$clusters_aagd, ncol=4)+theme_bw()

Despite testing different resolutions, the gdT clustering does not yield distinct immature/mature clusters. We attempt an approximation based on CD1a levels, although this does not reproduce initial annotations very well. Marker analysis of maturing gdT cells would be needed especially to compare CD1a/CD62L/CD27/CD44 along the maturation trajectory since CD45RA cannot serve as a marker in this subset.

In [None]:
Idents(seurObj_gdT) <- seurObj_gdT$clusters_aagd
seurObj_gdT$anno_new4 <- 'unknown'
seurObj_gdT$anno_new4 <- ifelse(colnames(seurObj_gdT) %in% WhichCells(seurObj_gdT, expression = CD1a >5, idents = c('10')), "gdT_immature", seurObj_gdT$anno_new4)
seurObj_gdT$anno_new4 <- ifelse(colnames(seurObj_gdT) %in% WhichCells(seurObj_gdT, idents = c('6')), "gdT_semimature", seurObj_gdT$anno_new4)
seurObj_gdT$anno_new4 <- ifelse(colnames(seurObj_gdT) %in% WhichCells(seurObj_gdT, expression = CD1a <5, idents = c('9')), "gdT_mature", seurObj_gdT$anno_new4)

In [None]:
seurObj$anno_new[colnames(seurObj) %in% colnames(seurObj_gdT)] <- seurObj_gdT$anno_new4

#### CD8aa

In [None]:
seurObj_8aa <- seurObj_8aa  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_8aa')

In [None]:
MNN <- reducedMNN(seurObj_8aa@reductions$pca_adt_8aa@cell.embeddings,
                 batch=seurObj_8aa$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_8aa[["mnn_adt_8aa"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_8aa <- seurObj_8aa %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_8aa')

In [None]:
MNN <- reducedMNN(seurObj_8aa@reductions$pca_rna_8aa@cell.embeddings,
                 batch=seurObj_8aa$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_8aa[["mnn_rna_8aa"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
dim_adt <- PCcutoff(seurObj_8aa@reductions$pca_adt_8aa, 0.1)
dim_adt

dim_rna <- PCcutoff(seurObj_8aa@reductions$pca_rna_8aa, 0.1)
dim_rna

seurObj_8aa <- FindMultiModalNeighbors(seurObj_8aa,
                                  reduction.list=list('mnn_adt_8aa', 'mnn_rna_8aa'),
                                   dims.list=list(1:dim_adt,1:dim_rna))

In [None]:
seurObj_8aa <- RunUMAP(seurObj_8aa, nn.name = "weighted.nn", reduction.name = "umap_wnn_8aa",
                        reduction.key = "wnnUMAP_")

In [None]:
seurObj_8aa <- RunSPCA(seurObj_8aa, assay='RNA', graph='wsnn')

In [None]:
ptm <- proc.time()

seurObj_8aa <- FindNeighbors(seurObj_8aa, graph.name = 'wsnn',reduction= 'spca', dims = 1:max(c(dim_adt, dim_rna)), assay='ADTdsb')
seurObj_8aa <- FindClusters(seurObj_8aa, graph.name = 'wsnn', algorithm = 4, resolution = 1, random.seed = 123,
                               method = 'igraph', cluster.name='clusters_8aa')

proc.time() - ptm

In [None]:
seurObj_8aa <- RunUMAP(seurObj_8aa, reduction = 'mnn_adt_8aa', dims = 1:dim_adt, reduction.name = "umap_adt_8aa",
                        reduction.key = "adtUMAP_")

In [None]:
ptm <- proc.time()

seurObj_8aa <- FindNeighbors(seurObj_8aa, reduction= 'mnn_adt_8aa', dims=1:dim_adt, assay='ADTdsb')
seurObj_8aa <- FindClusters(seurObj_8aa, algorithm = 4, resolution = 1.5, random.seed = 123,
                             method = 'igraph', cluster.name='clusters_8aa_adt')

proc.time() - ptm

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DimPlot(seurObj_8aa, reduction = 'umap_wnn_8aa', group.by = 'clusters_8aa', cols=pal12)

DimPlot(seurObj_8aa, reduction = 'umap_adt_8aa', group.by = 'clusters_8aa_adt', cols=pal12)

options(repr.plot.width=10, repr.plot.height=7)

DimPlot(seurObj_8aa, reduction = 'umap_wnn_8aa', group.by = 'anno_CITE_old', cols=sample(pal36))
DimPlot(seurObj_8aa, reduction = 'umap_adt_8aa', group.by = 'anno_CITE_old', cols=sample(pal36))


In [None]:
table(seurObj_8aa$anno_CITE_old, seurObj_8aa$clusters_8aa_adt)

In [None]:
options(repr.plot.width=16, repr.plot.height=20)
VlnPlot(seurObj_8aa, group.by = 'clusters_8aa_adt', features = sort(markers_gd), pt.size = 0, ncol=5, cols=pal12)

CD8aaI and II separate quite clearly. Within these, immature and mature clusters can be identified.

In [None]:
Idents(seurObj_8aa) <- seurObj_8aa$clusters_8aa_adt
seurObj_8aa$anno_new4 <- 'unknown'
seurObj_8aa$anno_new4 <- ifelse(colnames(seurObj_8aa) %in% WhichCells(seurObj_8aa, idents = c('2', '4', '11')), "CD8aaI_immature", seurObj_8aa$anno_new4)
seurObj_8aa$anno_new4 <- ifelse(colnames(seurObj_8aa) %in% WhichCells(seurObj_8aa, idents = c('3', '10', '1')), "CD8aaI_mature", seurObj_8aa$anno_new4)
seurObj_8aa$anno_new4 <- ifelse(colnames(seurObj_8aa) %in% WhichCells(seurObj_8aa, idents = c('5', '7', '8')), "CD8aaII_immature", seurObj_8aa$anno_new4)
seurObj_8aa$anno_new4 <- ifelse(colnames(seurObj_8aa) %in% WhichCells(seurObj_8aa, idents = c('6', '12', '9')), "CD8aaII_mature", seurObj_8aa$anno_new4)

In [None]:
seurObj$anno_new[colnames(seurObj) %in% colnames(seurObj_8aa)] <- seurObj_8aa$anno_new4

In [None]:
Idents(seurObj_aagd) <-seurObj_aagd$clusters_aagd

In [None]:
seurObj$anno_new[colnames(seurObj) %in% WhichCells(seurObj_aagd, idents = '7', expression = `TCR-Vd2`>20)] <- 'gdT_Vd2'

In [None]:
seurObj$anno_new[colnames(seurObj) %in% WhichCells(seurObj_aagd, idents = '7', expression = `TCR-Vd2`<20 & CD8 > 5)] <- 'SP_CD8_memory'

In [None]:
seurObj$anno_new <- gsub('unknown_DN', 'unknown', seurObj$anno_new)
seurObj$anno_new <- gsub('unknown_techn', 'unknown', seurObj$anno_new)

With this the annotation is complete. Some annotations have changed but the majority of cells has robust labels.

In [None]:
options(repr.plot.width=12, repr.plot.height=8)

df <- count(seurObj@meta.data, barcode, anno_CITE_old, anno_new) %>%
        mutate(id = row_number())%>%
        pivot_longer(cols=2:3, names_to = 'version', values_to = 'celltype')


ggplot(df, aes(x = version, y = n, 
                          stratum = celltype, fill = celltype,
                          alluvium = id)) +
  geom_stratum(alpha = .5) +
  geom_flow()+
  theme_minimal()+
  scale_fill_manual(values = c(pal60))

In [None]:
celltypes2 <- names(table(seurObj$anno_new))
names(celltypes2) <- seq_along(celltypes2)
celltypes2

In [None]:
celltypes_ordered2 <- celltypes2[c(38, 6:9, 12, 14:16, 13, 11, 26, 28, 27, 29, 32, 30, 31, 34, 35, 36, 33, 37, 2:5, 17, 19, 18, 20, 25, 23, 24, 21, 22, 1, 10, 39)]
celltypes_ordered2

In [None]:
names(celltypes_ordered2) <- seq_along(celltypes_ordered2)

In [None]:
seurObj$anno_CITE_new <- factor(seurObj$anno_new, levels=celltypes_ordered2)
seurObj$anno_new <- NULL

In [None]:
options(repr.plot.width=12, repr.plot.height=8)

DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'anno_CITE_new', cols=pal40, shuffle=T, raster=F)

In [None]:
options(repr.plot.width=12, repr.plot.height=8)

DimPlot(seurObj, reduction = 'umap_wnn_mnn', group.by = 'anno_CITE_old', cols=pal40, shuffle=T, raster=F)

In [None]:
write.csv(seurObj@meta.data, './HTSA_CITE_anno.csv')

In [None]:
SaveH5Seurat(seurObj, './HTSA_CITE_anno.h5seurat', overwrite = TRUE)

In [None]:
#seurObj <- LoadH5Seurat('./HTSA_CITE_anno.h5seurat')

In [None]:
#Convert("HTSA_CITE_anno_test.h5seurat", dest = "h5ad", assay = 'ADTdsb', overwrite = TRUE)
#CAVE: exports scale.data slot, no way to change it

In [None]:
write.csv(seurObj@assays$ADTdsb@data, file='ADTdsb.csv')