# CITEseq data analysis

*Author: Lena Boehme, Taghon lab, 2023*

## Pseudotime analysis of the CD4 vs. CD8 lineage

We carry out trajectory analysis on the abT cell lineages using slingshot. This allows us to track gene/protein expression patterns over time.

### Setup

In [None]:
setwd("/home/lenab/Documents/scSeq_analyses/B_TotalThymus_CITEseq/2022_TotalThymus_CITEseq_HTA/objects")

In [None]:
#default plotting settings

options(repr.plot.width=12, repr.plot.height=6)

options(scipen=100) #avoid scientific notation of numbers

In [None]:
library(SeuratDisk)
library(Seurat)
library(matrixStats)
library(ggplot2)
library(pheatmap)
library(reshape2)
library(dplyr)
library(tidyr)
library(viridis)
library(RColorBrewer)
library(stringr)
library(batchelor)
library(BiocParallel)
library(BiocNeighbors)
library(slingshot)
library(tradeSeq)
library(SingleCellExperiment)
library(scater)
library(destiny)
library(pals)

In [None]:
sessionInfo()

In [None]:
pal38 <- colorRampPalette(brewer.pal(12, "Paired"))(38)
pal16 <- brewer.paired(16)
pal8 <- brewer.paired(8)

In [None]:
seurObj_CITE <- LoadH5Seurat('./HTA2_v17_annonew2.h5seurat')

#### Trajectory analysis on post-positive selection T cells

Conventional ab T cells as well as Tregs and CD8aa IELs develop from DP thymocytes, so cells from all of these lineages are included in the analysis (in contrast to gdT cells, which arise earlier in thymocyte development).

In [None]:
table(seurObj_CITE$anno_CITE_4v3)

In [None]:
seurObj_CITE_SP <- subset(seurObj_CITE, subset=anno_CITE_4v3 %in% c('DP_4hi8lo', 'DP_pos_sel','SP_CD4_immature', 'SP_CD4_mature', 'SP_CD4_semimature', 'SP_CD8_immature', 'SP_CD8_mature', 'SP_CD8_semimature', 'SP_Treg_CD8', 'SP_Treg_immature', 'SP_Treg_mature', 'SP_Treg_PD1', 'CD8aaI_immature', 'CD8aaI_mature', 'CD8aaII_immature', 'CD8aaII_mature'))

In [None]:
seurObj_CITE_SP

In [None]:
seurObj_CITE_SP <- seurObj_CITE_SP  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_SP')

In [None]:
MNN <- reducedMNN(seurObj_CITE_SP@reductions$pca_adt_SP@cell.embeddings,
                 batch=seurObj_CITE_SP$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_CITE_SP[["mnn_adt_SP"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_CITE_SP <- seurObj_CITE_SP  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_SP')

In [None]:
MNN <- reducedMNN(seurObj_CITE_SP@reductions$pca_rna_SP@cell.embeddings,
                 batch=seurObj_CITE_SP$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_CITE_SP[["mnn_rna_SP"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
var_pc <- seurObj_CITE_SP@reductions$pca_adt_SP@stdev/sum(seurObj_CITE_SP@reductions$pca_adt_SP@stdev)*100
diffvar_pc <- var_pc[1:length(var_pc)-1] - var_pc[2:length(var_pc)]
dim_adt_SP <- sort(which(diffvar_pc >0.1), decreasing=TRUE)[1]+1
dim_adt_SP

var_pc <- seurObj_CITE_SP@reductions$pca_rna_SP@stdev/sum(seurObj_CITE_SP@reductions$pca_rna_SP@stdev)*100
diffvar_pc <- var_pc[1:length(var_pc)-1] - var_pc[2:length(var_pc)]
dim_rna_SP <- sort(which(diffvar_pc >0.1), decreasing=TRUE)[1]+1
dim_rna_SP

seurObj_CITE_SP <- FindMultiModalNeighbors(seurObj_CITE_SP,
                                  reduction.list=list('mnn_adt_SP', 'mnn_rna_SP'),
                                   dims.list=list(1:dim_adt_SP,1:dim_rna_SP))

In [None]:
seurObj_CITE_SP <- RunUMAP(seurObj_CITE_SP, nn.name = "weighted.nn", seed.use = 123, reduction.name = "umap_wnn_SP",
                        reduction.key = "wnnUMAP_")

In [None]:
options(repr.plot.width=9, repr.plot.height=7)

DimPlot(seurObj_CITE_SP, reduction = 'umap_wnn_SP', group.by = 'anno_CITE_4v2', shuffle=T, pt.size = 0.5, cols=pal16)

In [None]:
SCE_SP <- as.SingleCellExperiment(seurObj_CITE_SP)

In [None]:
reducedDim(SCE_SP, type='UMAP_WNN') <- seurObj_CITE_SP@reductions$umap_wnn@cell.embeddings
reducedDim(SCE_SP, type='UMAP_WNN_SP') <- seurObj_CITE_SP@reductions$umap_wnn_SP@cell.embeddings

In [None]:
lineages_SP_StEnd_mnn2 <- getLineages(reducedDims(SCE_SP)$UMAP_WNN_SP, dist.method='mnn', clusterLabels=seurObj_CITE_SP$anno_CITE_4v2, start.clus='DP_pos_sel', end.clus=c('SP_CD4_mature', 'SP_CD8_mature', 'SP_Treg_mature', 'CD8aaII_mature'))


In [None]:
curves_SP_mnn2 <-getCurves(lineages_SP_StEnd_mnn2, extend="n", stretch=0)


In [None]:
options(repr.plot.width=8, repr.plot.height=8)

plot(reducedDims(SCE_SP)$UMAP_WNN_SP, col = pal16[as.numeric(clusters_SP[as.character(seurObj_CITE_SP$anno_CITE_4v2)])],
    pch = 16, cex=0.5, main="Start + Ends - mnn")
lines(SlingshotDataSet(lineages_SP_StEnd_mnn2), col = 'black', show.constraints = TRUE)
lines(SlingshotDataSet(curves_SP_mnn2), col = 'blue', show.constraints = TRUE)

In [None]:
slingLineages(curves_SP_mnn2)

In [None]:
par(mfrow = c(3, 2))
options(repr.plot.width=10, repr.plot.height=15)

pseudotime_SP <- slingPseudotime(curves_SP_mnn2)
names_lineages_SP <- colnames(pseudotime_SP)

for (i in names_lineages_SP) {
plotcol <- colors_pseudotime[cut(pseudotime_SP[,i], breaks=100)]
    plot(reducedDims(SCE_SP)$UMAP_WNN_SP, col = plotcol,
     pch = 16, cex=0.5, main=paste(i, '-', tail(slingLineages(curves_SP_mnn2)[[i]],1)))
    lines(slingCurves(curves_SP_mnn2)[[i]], col = 'black') #plots only the relevant curve
    lines(SlingshotDataSet(lineages_SP_StEnd_mnn2), col = 'grey30', show.constraints = TRUE, type = "l", lty = "dashed")
}

In [None]:
pseudotime_SP.df <- data.frame(pseudotime_SP)
pseudotime_SP.df$anno <- seurObj_CITE_SP$anno_CITE_4v2[rownames(pseudotime_SP.df)]

pseudotime_SP.df %>% head

In [None]:
options(repr.plot.width=10, repr.plot.height=8)

for (i in 1:length(names_lineages_SP)){
print(
    ggplot(pseudotime_SP.df, aes(x = pseudotime_SP.df[,i], y = anno, colour = anno)) +
    geom_jitter(size=1) +
    theme_classic() +
    xlab("Pseudotime") + ylab("Celltype") +
    scale_color_manual(values = pal16)+
    ggtitle(paste("Cells of", colnames(pseudotime_SP.df)[i], "ordered by pseudotime", sep=' '))
    )
}

Add pseudotimes to main object:

In [None]:
seurObj_CITE$spt_spca_SP_Treg <- 'NA'
seurObj_CITE$spt_spca_SP_CD8aa <- 'NA'
seurObj_CITE$spt_spca_SP_CD4 <- 'NA'
seurObj_CITE$spt_spca_SP_CD8 <- 'NA'
seurObj_CITE$spt_spca_SP_TregCD8 <- 'NA'

seurObj_CITE@meta.data[rownames(pseudotime_SP.df),'spt_spca_SP_Treg'] <- pseudotime_SP.df$Lineage1
seurObj_CITE@meta.data[rownames(pseudotime_SP.df),'spt_spca_SP_CD8aa'] <- pseudotime_SP.df$Lineage2
seurObj_CITE@meta.data[rownames(pseudotime_SP.df),'spt_spca_SP_CD4'] <- pseudotime_SP.df$Lineage3
seurObj_CITE@meta.data[rownames(pseudotime_SP.df),'spt_spca_SP_CD8'] <- pseudotime_SP.df$Lineage4
seurObj_CITE@meta.data[rownames(pseudotime_SP.df),'spt_spca_SP_TregCD8'] <- pseudotime_SP.df$Lineage5

In [None]:
for (i in 230:242){
seurObj_CITE@meta.data[,i] <- as.numeric(seurObj_CITE@meta.data[,i])
    }

#### Reassessing lineage annotations

According to the the binned CMA mapping CD4hi8lo and CD8 immature cells map to the cortex but CD4 immature cells already to the medulla. This suggests that either CD4 and CD8 lineage cells behave differently after the commitment or the CD4hi8lo annotation contains CD4-committed cells.

In [None]:
seurObj_CITE_4vs8 <- subset(seurObj_CITE, subset = anno_CITE_4v2 %in% c('DP_pos_sel','DP_4hi8lo','SP_CD4_immature', 'SP_CD4_semimature', 'SP_CD4_mature', 'SP_CD8_immature', 'SP_CD8_semimature', 'SP_CD8_mature'))

In [None]:
seurObj_CITE_4vs8 <- seurObj_CITE_4vs8  %>%
            FindVariableFeatures(assay='ADTdsb') %>%
            ScaleData(assay = 'ADTdsb') %>%
            RunPCA(assay = 'ADTdsb', npcs = 50, reduction.name = 'pca_adt_4vs8')

In [None]:
MNN <- reducedMNN(seurObj_CITE_4vs8@reductions$pca_adt_4vs8@cell.embeddings,
                 batch=seurObj_CITE_4vs8$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_CITE_4vs8[["mnn_adt_4vs8"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="ADTdsb",
                                        key="mnn_")

In [None]:
seurObj_CITE_4vs8 <- seurObj_CITE_4vs8  %>%
            FindVariableFeatures(assay='RNA') %>%
            ScaleData(assay = 'RNA') %>%
            RunPCA(assay = 'RNA', npcs = 50, reduction.name = 'pca_rna_4vs8')

In [None]:
MNN <- reducedMNN(seurObj_CITE_4vs8@reductions$pca_rna_4vs8@cell.embeddings,
                 batch=seurObj_CITE_4vs8$sample,
                 BPPARAM=MulticoreParam(workers=12), #parallelisation
                 BNPARAM=HnswParam())
seurObj_CITE_4vs8[["mnn_rna_4vs8"]] <- CreateDimReducObject(embeddings=MNN$corrected,
                                        assay="RNA",
                                        key="mnn_")

In [None]:
var_pc <- seurObj_CITE_4vs8@reductions$pca_adt_4vs8@stdev/sum(seurObj_CITE_4vs8@reductions$pca_adt_4vs8@stdev)*100
diffvar_pc <- var_pc[1:length(var_pc)-1] - var_pc[2:length(var_pc)]
dim_adt_4vs8 <- sort(which(diffvar_pc >0.1), decreasing=TRUE)[1]+1
dim_adt_4vs8

var_pc <- seurObj_CITE_4vs8@reductions$pca_rna_4vs8@stdev/sum(seurObj_CITE_4vs8@reductions$pca_rna_4vs8@stdev)*100
diffvar_pc <- var_pc[1:length(var_pc)-1] - var_pc[2:length(var_pc)]
dim_rna_4vs8 <- sort(which(diffvar_pc >0.1), decreasing=TRUE)[1]+1
dim_rna_4vs8

seurObj_CITE_4vs8 <- FindMultiModalNeighbors(seurObj_CITE_4vs8,
                                  reduction.list=list('mnn_adt_4vs8', 'mnn_rna_4vs8'),
                                   dims.list=list(1:dim_adt_4vs8,1:dim_rna_4vs8))

In [None]:
seurObj_CITE_4vs8 <- RunUMAP(seurObj_CITE_4vs8, nn.name = "weighted.nn", seed.use = 123, reduction.name = "umap_wnn_4vs8",
                        reduction.key = "wnnUMAP_")

In [None]:
seurObj_CITE_4vs8 <- RunSPCA(seurObj_CITE_4vs8, graph='wsnn')

In [None]:
options(repr.plot.width=9, repr.plot.height=7)

DimPlot(seurObj_CITE_4vs8, reduction = 'umap_wnn_4vs8', group.by = 'anno_CITE_4v2', shuffle=T, pt.size = 0.5, cols=pal8)

In [None]:
options(repr.plot.width=16, repr.plot.height=7)

FeaturePlot(seurObj_CITE_4vs8, reduction = 'umap_wnn_4vs8', features = c('spt_spca_SP_CD4', 'spt_spca_SP_CD8'),pt.size = 0.5, cols=viridis(100))

In [None]:
options(repr.plot.width=8, repr.plot.height=5)

ggplot(seurObj_CITE_4vs8@meta.data, aes(x = as.numeric(spt_spca_SP_CD8), y = anno_CITE_4v2, colour = !is.na(as.numeric(spt_spca_SP_CD4)))) +
geom_jitter(size=0.3, alpha=0.5) +
theme_classic() +
labs(x="Pseudotime", y="Celltype", title='Cells ordered according to CD8 developmental pseudotime', colour='Also included in \nCD4 pseudotime')

Pseudotime analysis and comparison with annotations shows that several cells annotated as CD4 lineage are included in the CD8 lineage pseudotime and vice versa. On the other hand there are cells in the DP_4hilo subset that have no lineage pseudotime or only for one of the lineages. We can distinguish these cells and check if the annotations are indeed appropriate.

DP_4hi8lo subsets:

1: early pseudotime for both lineages

2: late pseudotime for both lineages

3: pseudotime CD8 >> CD4

4: only CD4 pseudotime

5: only CD8 pseudotime

6: no pseudotime

In [None]:
seurObj_CITE_4vs8$anno_CITE_4v4 <- as.character(seurObj_CITE_4vs8$anno_CITE_4v2)

seurObj_CITE_4vs8$anno_CITE_4v4 <- ifelse(seurObj_CITE_4vs8$anno_CITE_4v2 == 'DP_4hi8lo' & !is.na(seurObj_CITE_4vs8$spt_spca_SP_CD8) & !is.na(seurObj_CITE_4vs8$spt_spca_SP_CD4), 'DP_4hi8lo_2', seurObj_CITE_4vs8$anno_CITE_4v4)
seurObj_CITE_4vs8$anno_CITE_4v4 <- ifelse(seurObj_CITE_4vs8$anno_CITE_4v4 == 'DP_4hi8lo_2' & seurObj_CITE_4vs8$spt_spca_SP_CD8 > 4 & seurObj_CITE_4vs8$spt_spca_SP_CD4 < 3, 'DP_4hi8lo_3', seurObj_CITE_4vs8$anno_CITE_4v4)
seurObj_CITE_4vs8$anno_CITE_4v4 <- ifelse(seurObj_CITE_4vs8$anno_CITE_4v4 == 'DP_4hi8lo_2' & seurObj_CITE_4vs8$spt_spca_SP_CD8 < 3.5 & seurObj_CITE_4vs8$spt_spca_SP_CD4 < 3.5, 'DP_4hi8lo_1', seurObj_CITE_4vs8$anno_CITE_4v4)
seurObj_CITE_4vs8$anno_CITE_4v4 <- ifelse(seurObj_CITE_4vs8$anno_CITE_4v4 == 'DP_4hi8lo' & is.na(seurObj_CITE_4vs8$spt_spca_SP_CD8) & !is.na(seurObj_CITE_4vs8$spt_spca_SP_CD4), 'DP_4hi8lo_4', seurObj_CITE_4vs8$anno_CITE_4v4)
seurObj_CITE_4vs8$anno_CITE_4v4 <- ifelse(seurObj_CITE_4vs8$anno_CITE_4v4 == 'DP_4hi8lo' & !is.na(seurObj_CITE_4vs8$spt_spca_SP_CD8) & is.na(seurObj_CITE_4vs8$spt_spca_SP_CD4), 'DP_4hi8lo_5', seurObj_CITE_4vs8$anno_CITE_4v4)
seurObj_CITE_4vs8$anno_CITE_4v4 <- ifelse(seurObj_CITE_4vs8$anno_CITE_4v4 == 'DP_4hi8lo' & is.na(seurObj_CITE_4vs8$spt_spca_SP_CD8) & is.na(seurObj_CITE_4vs8$spt_spca_SP_CD4), 'DP_4hi8lo_6', seurObj_CITE_4vs8$anno_CITE_4v4)

In [None]:
table(seurObj_CITE_4vs8$anno_CITE_4v4)

In [None]:
options(repr.plot.width=9, repr.plot.height=7)

DimPlot(seurObj_CITE_4vs8, reduction = 'umap_wnn_4vs8', group.by = 'anno_CITE_4v2', shuffle=T, pt.size = 0.5, cols=pal8)

In [None]:
options(repr.plot.width=9, repr.plot.height=7)

DimPlot(seurObj_CITE_4vs8, reduction = 'umap_wnn_4vs8', group.by = 'anno_CITE_4v4', shuffle=T, pt.size = 0.5, cols=brewer.paired(13))

In [None]:
options(repr.plot.width=8, repr.plot.height=5)

ggplot(seurObj_CITE_4vs8@meta.data, aes(x = as.numeric(spt_spca_SP_CD4), y = anno_CITE_4v4, colour = !is.na(as.numeric(spt_spca_SP_CD8)))) +
geom_jitter(size=0.3, alpha=0.5) +
theme_classic() +
labs(x="Pseudotime", y="Celltype", title='Cells ordered according to CD4 developmental pseudotime', colour='Also included in \nCD8 pseudotime')

In [None]:
options(repr.plot.width=8, repr.plot.height=5)

ggplot(seurObj_CITE_4vs8@meta.data, aes(x = as.numeric(spt_spca_SP_CD8), y = anno_CITE_4v4, colour = !is.na(as.numeric(spt_spca_SP_CD4)))) +
geom_jitter(size=0.3, alpha=0.5) +
theme_classic() +
labs(x="Pseudotime", y="Celltype", title='Cells ordered according to CD8 developmental pseudotime', colour='Also included in \nCD4 pseudotime')

We label cells that are annotated as one lineage but have been included in the pseudotime of the other as 'ambiguous'.

In [None]:
seurObj_CITE_4vs8$anno_CITE_4v4 <- ifelse(seurObj_CITE_4vs8$anno_CITE_4v4 %in% c('SP_CD4_immature', 'SP_CD4_semimature', 'SP_CD4_mature') & !is.na(seurObj_CITE_4vs8$spt_spca_SP_CD8), 'SP_CD4_ambig', seurObj_CITE_4vs8$anno_CITE_4v4)
seurObj_CITE_4vs8$anno_CITE_4v4 <- ifelse(seurObj_CITE_4vs8$anno_CITE_4v4 %in% c('SP_CD8_immature', 'SP_CD8_semimature', 'SP_CD8_mature') & !is.na(seurObj_CITE_4vs8$spt_spca_SP_CD4), 'SP_CD8_ambig', seurObj_CITE_4vs8$anno_CITE_4v4)


In [None]:
seurObj_CITE_4vs8$anno_CITE_4v4 <- as.factor(seurObj_CITE_4vs8$anno_CITE_4v4)
levs <- levels(seurObj_CITE_4vs8$anno_CITE_4v4)
seurObj_CITE_4vs8$anno_CITE_4v4 <- factor(seurObj_CITE_4vs8$anno_CITE_4v4, levels=levs[c(7,1:6,9,11,10,13,15,14,8,12)])

In [None]:
options(repr.plot.width=9, repr.plot.height=7)

DimPlot(seurObj_CITE_4vs8, reduction = 'umap_wnn_4vs8', group.by = 'anno_CITE_4v4', shuffle=T, pt.size = 0.5, cols=brewer.paired(15))

We can carry out DGE analyses to determine if the CD4hiCD8lo subsets already show distinct gene expression patterns that would identify as CD4- or CD8 lineage cells.

In [None]:
Idents(seurObj_CITE_4vs8) <- seurObj_CITE_4vs8$anno_CITE_4v4
DefaultAssay(seurObj_CITE_4vs8) <- 'RNA' #DGE on RNA level

In [None]:
DPpossel_DP4hi8lo_1 <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_pos_sel', `ident.1` = 'DP_4hi8lo_1', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DPpossel_DP4hi8lo_1

In [None]:
options(repr.plot.width=22, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DPpossel_DP4hi8lo_1), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between DP_pos_sel and DP_4hi8lo_1')

In [None]:
DP4hi8lo_1_2 <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_4hi8lo_1', `ident.1` = 'DP_4hi8lo_2', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_1_2

In [None]:
options(repr.plot.width=6, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_1_2), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between \nDP_4hi8lo_1 and DP_4hi8lo_2')

In [None]:
DP4hi8lo_2_3 <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_4hi8lo_2', `ident.1` = 'DP_4hi8lo_3', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_2_3

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_2_3), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between \nDP_4hi8lo_2 and DP_4hi8lo_3')

In [None]:
DP4hi8lo_1_3 <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_4hi8lo_1', `ident.1` = 'DP_4hi8lo_3', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_1_3

In [None]:
DP4hi8lo_2_4 <- FindMarkers(seurObj_CITE_4vs8, `ident.1` = 'DP_4hi8lo_4', `ident.2` = 'DP_4hi8lo_2', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_2_4

In [None]:
options(repr.plot.width=6, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_2_4), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between \nDP_4hi8lo_2 and DP_4hi8lo_4')

In [None]:
DP4hi8lo_2_5 <- FindMarkers(seurObj_CITE_4vs8, `ident.1` = 'DP_4hi8lo_5', `ident.2` = 'DP_4hi8lo_2', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_2_5

In [None]:
options(repr.plot.width=6, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_2_5), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between \nDP_4hi8lo_2 and DP_4hi8lo_5')

In [None]:
DP4hi8lo_1_2_4 <- FindMarkers(seurObj_CITE_4vs8, `ident.1` = 'DP_4hi8lo_4', `ident.2` = c('DP_4hi8lo_1', 'DP_4hi8lo_2'), test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_1_2_4

In [None]:
options(repr.plot.width=10, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_1_2_4), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between DP_4hi8lo_1+2 and DP_4hi8lo_4')

In [None]:
DP4hi8lo_1_2_5 <- FindMarkers(seurObj_CITE_4vs8, `ident.1` = 'DP_4hi8lo_5', `ident.2` = c('DP_4hi8lo_1', 'DP_4hi8lo_2'), test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_1_2_5

In [None]:
DP4hi8lo_4_SP4im <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_4hi8lo_4', `ident.1` = 'SP_CD4_immature', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_4_SP4im

In [None]:
options(repr.plot.width=6, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_4_SP4im), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between \nDP_4hi8lo_4 and SP_CD4_immature')

In [None]:
DP4hi8lo_5_SP8im <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_4hi8lo_5', `ident.1` = 'SP_CD8_immature', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_5_SP8im

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_5_SP8im), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between \nDP_4hi8lo_5 and SP_CD8_immature')

In [None]:
DP4hi8lo_4_5 <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_4hi8lo_5', `ident.1` = 'DP_4hi8lo_4', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_4_5

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_4_5), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between \nDP_4hi8lo_4 and nDP_4hi8lo_5')

In [None]:
DEgenes <- c(rownames(DPpossel_DP4hi8lo_1),
             rownames(DP4hi8lo_1_2),
             rownames(DP4hi8lo_2_3),
             rownames(DP4hi8lo_2_4),
             rownames(DP4hi8lo_2_5),
             rownames(DP4hi8lo_1_2_4),
             rownames(DP4hi8lo_4_SP4im),
             rownames(DP4hi8lo_5_SP8im),
             rownames(DP4hi8lo_4_5)) %>% unique

In [None]:
DefaultAssay(seurObj_CITE_4vs8) <- 'ADTdsb' #DE analysis on protein level

In [None]:
DPpossel_DP4hi8lo_1 <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_pos_sel', `ident.1` = 'DP_4hi8lo_1', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DPpossel_DP4hi8lo_1

In [None]:
options(repr.plot.width=5, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DPpossel_DP4hi8lo_1), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between DP_pos_sel and DP_4hi8lo_1')

In [None]:
DP4hi8lo_1_2 <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_4hi8lo_1', `ident.1` = 'DP_4hi8lo_2', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_1_2

In [None]:
options(repr.plot.width=5, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_1_2), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between \nDP_4hi8lo_1 and DP_4hi8lo_2')

In [None]:
DP4hi8lo_2_3 <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_4hi8lo_2', `ident.1` = 'DP_4hi8lo_3', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_2_3

In [None]:
options(repr.plot.width=5, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_2_3), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between \nDP_4hi8lo_2 and DP_4hi8lo_3')

In [None]:
DP4hi8lo_1_3 <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_4hi8lo_1', `ident.1` = 'DP_4hi8lo_3', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_1_3

In [None]:
DP4hi8lo_2_4 <- FindMarkers(seurObj_CITE_4vs8, `ident.1` = 'DP_4hi8lo_4', `ident.2` = 'DP_4hi8lo_2', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_2_4

In [None]:
options(repr.plot.width=5, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_2_4), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between \nDP_4hi8lo_2 and DP_4hi8lo_4')

In [None]:
DP4hi8lo_2_5 <- FindMarkers(seurObj_CITE_4vs8, `ident.1` = 'DP_4hi8lo_5', `ident.2` = 'DP_4hi8lo_2', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_2_5

In [None]:
DP4hi8lo_1_2_4 <- FindMarkers(seurObj_CITE_4vs8, `ident.1` = 'DP_4hi8lo_4', `ident.2` = c('DP_4hi8lo_1', 'DP_4hi8lo_2'), test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_1_2_4

In [None]:
options(repr.plot.width=6, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_1_2_4), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between DP_4hi8lo_1+2 and DP_4hi8lo_4')

In [None]:
DP4hi8lo_1_2_5 <- FindMarkers(seurObj_CITE_4vs8, `ident.1` = 'DP_4hi8lo_5', `ident.2` = c('DP_4hi8lo_1', 'DP_4hi8lo_2'), test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_1_2_5

In [None]:
DP4hi8lo_4_SP4im <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_4hi8lo_4', `ident.1` = 'SP_CD4_immature', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_4_SP4im

In [None]:
DP4hi8lo_5_SP8im <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_4hi8lo_5', `ident.1` = 'SP_CD8_immature', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_5_SP8im

In [None]:
options(repr.plot.width=6, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_5_SP8im), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_distiller(palette = 'Spectral')+
labs(title = 'Significantly DE genes between \nDP_4hi8lo_5 and SP_CD8_immature')

In [None]:
DP4hi8lo_4_5 <- FindMarkers(seurObj_CITE_4vs8, `ident.2` = 'DP_4hi8lo_5', `ident.1` = 'DP_4hi8lo_4', test.use = 'roc') %>% filter(abs(avg_log2FC) > 0.8) %>% .[order(.$avg_log2FC),]
DP4hi8lo_4_5

In [None]:
options(repr.plot.width=8, repr.plot.height=7)

DotPlot(seurObj_CITE_4vs8, features = rownames(DP4hi8lo_4_5), group.by = 'anno_CITE_4v4', scale = F)+
theme(axis.text.x = element_text(angle=45, hjust=1))+
scale_color_viridis()+
labs(title = 'Significantly DE genes between \nDP_4hi8lo_4 and nDP_4hi8lo_5')

In [None]:
options(repr.plot.width=18, repr.plot.height=8)

VlnPlot(seurObj_CITE_4vs8, features = c('CD1a', 'CD4', 'CD8', 'CD27', 'HLA.ABC', 'CD127', 'CD3'), pt.size = 0, ncol=4)

It seems that the 4hi8lo subgroup 4 is already CD4 immature, so to avoid heterogeneity in the 4hi8lo it's best to adjust the annotations.

In [None]:
seurObj_CITE$anno_CITE_4v5 <- as.character(seurObj_CITE$anno_CITE_4v2)
seurObj_CITE$anno_CITE_4v5[WhichCells(seurObj_CITE_4vs8, idents = 'DP_4hi8lo_4')] <- 'SP_CD4_immature'
seurObj_CITE$anno_CITE_4v5 <- factor(seurObj_CITE$anno_CITE_4v5, levels=levels_new)

In [None]:
table(seurObj_CITE$anno_CITE_4v5)