# Part 9: Analysis of Unconventional T cells

In this document, we focus on Tgd and unconventional cell cluster subsetted from the main CD8 dataset. To recapitulate the analysis, please download the Tgd Seurat object available at Zenodo: [https://doi.org/10.5281/zenodo.14222418](https://doi.org/10.5281/zenodo.14222418)

In [None]:
.libPaths("~/R/x86_64-pc-linux-gnu-library/4.4/")
library(GEOquery)
source("diabetes_analysis_v07.R")

## Load data

In [None]:
## CD8 All
cd8_l1_full_filt  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
cd8_l1_full_filt

In [None]:
## CD8 Tgd
gd  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L2/cd8_l2_unc.rds")

# Tr3-56 published data

We download the published bulk RNAseq dataset characterizing TR3-56 cells from [Terrazzano et al., 2020](https://www.nature.com/articles/s42255-020-0173-1)

In [None]:
gset <- GEOquery::getGEO('GSE106082')

In [None]:
gset <- readRDS("../data//geo//tr356.rds")

In [None]:
metadata <- data.frame(geo_id = gset$GSE106082_series_matrix.txt.gz$geo_accession,
                       cell_type = gset$GSE106082_series_matrix.txt.gz$`characteristics_ch1.1`
                       )

We will load the matrix and use it together with the metadata to identify potential Tr3-56 cells by SingleR. 

In [None]:
mtx_tr356  <- read_csv("../../240218_VN_Diabetes_V05/data/published_data/Terrazzano_2020/tr356_df_sum.csv")

In [None]:
mtx_tr356$`...1`  <- NULL

In [None]:
mtx_tr356  <- mtx_tr356  %>% column_to_rownames("SYMBOL")

In [None]:
mtx_tr356

## Annotation of Tr3-56 cells

In [None]:
library(SingleR)

In [None]:
pred <- SingleR(test = cd8_l1_full_filt@assays$RNA@counts, 
                ref=mtx_tr356, labels=metadata$cell_type, de.method="wilcox",fine.tune = F,num.threads = 4)

In [None]:
mtx_tr356

In [None]:
mtx_tr356  <- mtx_tr356  %>% mutate_all(.funs = as.numeric)

In [None]:
mtx_tr356

In [None]:
metadata$cell_type

Create a reference without  CD3+CD56- population as this is too broad for our purpose and overlaps with CD8+ population. 

In [None]:
ref_tr3_56 <- list(matrix = mtx_tr356[,c(1:6,10:12)], 
                       labels = metadata$cell_type[c(1:6,10:12)])


In [None]:
mtx_tr356

In [None]:
ref_tr3_56$labels

In [None]:
ref_tr3_56$matrix  <- as.matrix(ref_tr3_56$matrix)

In [None]:
pred <- SingleR(test = cd8_l1_full_filt@assays$RNA@counts, 
                ref=ref_tr3_56$matrix, labels=ref_tr3_56$labels, 
                fine.tune = T,num.threads = 4
       )

Add labels to the Seurat object. 

In [None]:
cd8_l1_full_filt$singler  <- pred$labels

In [None]:
library(Seurat)

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4.5)
DimPlot(cd8_l1_full_filt, group.by = "annotations_l2")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4.5)
DimPlot(cd8_l1_full_filt, group.by = "singler", shuffle = T, cols = c("darkseagreen1","skyblue1","red"))

In [None]:
pred$scores

## Quantification of Tr3-56 score in cell types

In [None]:
test  <- data.frame(annotation = cd8_l1_full_filt$annotations_l2,
                    pred = cd8_l1_full_filt$singler)

In [None]:
df2 <- test %>% group_by(annotation, pred) %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
df2

In [None]:
ggtheme <- function() {
  theme(
    axis.text = element_text(size = 20),
    axis.title = element_text(size = 20),
    text = element_text(size = 20, colour = "black"),
    legend.text = element_text(size = 20),
    legend.key.size =  unit(10, units = "points")
    
  )
}


In [None]:
options(repr.plot.width = 16, repr.plot.height = 4.5)
ggplot(test) +
  aes(x = annotation, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
coord_flip() + 
scale_fill_manual(values = c("#d6ebd2ff", "lightskyblue1", "red2"))
#scale_fill_manual(values = c("grey","#74bc68ff", "dodgerblue3", "red2"))

In [None]:
pred$scores

In [None]:
cd8_l1_full_filt$score_tr356  <- pred$scores[,3]

We can see that the highest Tr3-56 score is found in Tgd cells and MAIT cells. 

In [None]:
options(repr.plot.width = 14, repr.plot.height = 6)
VlnPlot(cd8_l1_full_filt, features = "score_tr356", group.by = "annotations_l2", pt.size = 0)

In [None]:
options(repr.plot.width = 14, repr.plot.height = 6)
data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = "\n"))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
ggplot(aes(x = fct_reorder(Annotation, Score), y = Score)) +
  geom_violin() + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")

Group nonNaive cells together. 

In [None]:
data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
ggplot(aes(x = fct_reorder(annot2, Score), y = Score)) +
  geom_violin() + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")
# ggsave("../figures/cd356_score.svg", width = 4, height = 3.5)

In [None]:
options(repr.plot.width = 5, repr.plot.height = 4.5)

FeaturePlot(cd8_l1_full_filt, features = "score_tr356", min.cutoff = 0.2, max.cutoff = 0.32)

In [None]:
FeaturePlot(cd8_l1_full_filt, features = "NCAM1", min.cutoff = 0.2)

### Score per patient

In [None]:
options(repr.plot.width = 6, repr.plot.height = 8)

data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""),
          Patient_ID = cd8_l1_full_filt$Patient_ID,
          Patient_Time = cd8_l1_full_filt$Patient_Time,
          Disease = cd8_l1_full_filt$Disease,
           Condition = cd8_l1_full_filt$Condition
          )  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
ggplot(aes(x = Condition, y = Score)) +
  geom_violin() + 
stat_compare_means(comparisons = list(c(1,2),c(2,3),c(1,3))) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")

In [None]:

data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""),
          Patient_ID = cd8_l1_full_filt$Patient_ID,
          Patient_Time = cd8_l1_full_filt$Patient_Time,
          Disease = cd8_l1_full_filt$Disease, 
           Condition = cd8_l1_full_filt$Condition
          )  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
group_by(Condition, Patient_Time)  %>% 
summarize(Score = mean(Score))  %>% 
ggplot(aes(x = Condition, y = Score)) +
  geom_violin() + 
geom_point()+
stat_compare_means(comparisons = list(c(1,2),c(2,3))) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score All CD8 cells") + theme_classic() + ggtheme() + xlab("")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 6)

data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""),
          Patient_ID = cd8_l1_full_filt$Patient_ID,
          Patient_Time = cd8_l1_full_filt$Patient_Time,
          Disease = cd8_l1_full_filt$Disease, 
           Condition = cd8_l1_full_filt$Condition
          )  %>% 
dplyr::filter(Patient_ID != 116)  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
group_by(Condition, Patient_Time)  %>% 
summarize(Score = mean(Score))  %>% 
ggplot(aes(x = Condition, y = Score)) +
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
stat_compare_means(comparisons = list(c(2,3)), paired = T) +
   ggtitle("TR3-56 score All CD8 cells") + theme_classic() + ggtheme() + xlab("")

In [None]:
ggsave("../figures/subset_characterization/tr3_56_score_in_condition.svg",
       width = 10, height = 9.5, units = "cm")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 6)

data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""),
          Patient_ID = cd8_l1_full_filt$Patient_ID,
          Patient_Time = cd8_l1_full_filt$Patient_Time,
          Disease = cd8_l1_full_filt$Disease, 
           Condition = cd8_l1_full_filt$Condition
          )  %>% 
dplyr::filter(Patient_ID != 116)  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
group_by(Condition, Patient_Time)  %>% 
summarize(Score = mean(Score))  %>% 
ggplot(aes(x = Condition, y = Score)) +
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
stat_compare_means(comparisons = list(c(2,3)), paired = T) +
   ggtitle("TR3-56 score All CD8 cells") + theme_classic() + ggtheme() + xlab("")

In [None]:
data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""),
          Patient_ID = cd8_l1_full_filt$Patient_ID,
          Patient_Time = cd8_l1_full_filt$Patient_Time,
          Disease = cd8_l1_full_filt$Disease, 
           Condition = cd8_l1_full_filt$Condition
          )  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
group_by(Condition, Patient_Time)  %>% 
summarize(Score = mean(Score))  %>% 
ggplot(aes(x = Condition, y = Score)) +
  geom_violin() + 
geom_point()+
stat_compare_means(comparisons = list(c(1,2),c(2,3))) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score All CD8 cells") + theme_classic() + ggtheme() + xlab("")

In [None]:
data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
ggplot(aes(x = fct_reorder(annot2, Score), y = Score)) +
  geom_violin() + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")
ggsave("../figures/cd356_score.svg", width = 4, height = 3.5)

## Phenotype and DEG of Tr3-56 cells

Next, we will evaluate which cells are the most likely true TR3-56 cells. 

In [None]:
cd8_l1_full_filt@meta.data  <- cd8_l1_full_filt@meta.data  %>% mutate(
    Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))

In [None]:
test  <- data.frame(annot2 = cd8_l1_full_filt$annot2,
                    annotation = cd8_l1_full_filt$annotations_manual,
                    pred = cd8_l1_full_filt$singler, 
                    score = cd8_l1_full_filt$score_tr356)

In [None]:
options(repr.plot.width = 7, repr.plot.height = 10)
test  %>% 
dplyr::filter(pred == "cell subset: TR3-56") %>% 
ggplot(aes(x = fct_reorder(annotation, score), y = score)) +
  geom_violin() + 
geom_jitter(alpha = 0.2) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("") +
theme(axis.text.x = element_text(angle = 90))

In [None]:
options(repr.plot.width = 4, repr.plot.height = 6)
test  %>% 
dplyr::filter(pred == "cell subset: TR3-56") %>% 
ggplot(aes(x = fct_reorder(annot2, score), y = score)) +
  geom_violin() + 
geom_jitter(alpha = 0.2) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("") +
theme(axis.text.x = element_text(angle = 90))

In [None]:
options(repr.plot.width = 4, repr.plot.height = 6)
test  %>% 
dplyr::filter(pred == "cell subset: TR3-56") %>% 
ggplot(aes(x = fct_reorder(annot2, score), y = score)) +
  geom_violin() + 
#geom_jitter(alpha = 0.2) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("") +
theme(axis.text.x = element_text(angle = 90))

In [None]:
ggsave("../figures/tr356_score_in_tr356.svg", width = 8, height = 12, units = "cm")

# Unique DEG of TR3-56 cells

TR3-56 cells are found in multiple clusters, so let's figure out what makes their signature unique. 

In [None]:
cd8_l1_full_filt$annot_with_tr356  <- ifelse(cd8_l1_full_filt$singler == "cell subset: TR3-56", "TR3-56", 
                                             cd8_l1_full_filt$annot2)

In [None]:
cd8_l1_full_filt$annot_with_tr356  %>% table

What defines TR3-56 cells?

In [None]:
DefaultAssay(cd8_l1_full_filt)  <- "RNA"

In [None]:
Idents(cd8_l1_full_filt)  <- cd8_l1_full_filt$annot_with_tr356

Find markers compared to all other cells. 

In [None]:
mrk_tr356  <- FindMarkers(cd8_l1_full_filt, `ident.1` = "TR3-56")

Find markers over other gd cells. 

In [None]:
mrk_tr356_over_tgd  <- FindMarkers(cd8_l1_full_filt, `ident.1` = "TR3-56", `ident.2` = "gd T cells")

In [None]:
mrk_tr356_over_tgd

Find markers over other MAIT cells. 

In [None]:
mrk_tr356_over_mait  <- FindMarkers(cd8_l1_full_filt, `ident.1` = "TR3-56", `ident.2` = "MAIT cells")

In [None]:
mrk_tr356_over_mait

Find markers over other nonNaive cells. 

In [None]:
mrk_tr356_over_nonNaive  <- FindMarkers(cd8_l1_full_filt, `ident.1` = "TR3-56", `ident.2` = "NonNaive")

Create intersection of these markers. 

In [None]:
mrk_tr356_over_tgd_genes  <- rownames(mrk_tr356_over_tgd  %>% dplyr::filter(p_val_adj< 0.05 & avg_log2FC > 0))
mrk_tr356_over_mait_genes  <- rownames(mrk_tr356_over_mait  %>% dplyr::filter(p_val_adj< 0.05 & avg_log2FC > 0))
mrk_tr356_over_nonNaive_genes  <- rownames(mrk_tr356_over_nonNaive  %>% dplyr::filter(p_val_adj< 0.05 & avg_log2FC > 0))


In [None]:
mrk_tr356  <- intersect(mrk_tr356_over_tgd_genes[1:150], 
                        intersect(mrk_tr356_over_mait_genes[1:150],
                                                                   mrk_tr356_over_nonNaive_genes[1:150]))

In [None]:
avgexp = AverageExpression(cd8_l1_full_filt, features = mrk_tr356,
                           return.seurat = F, group.by = "annot_with_tr356", 
                          assay = "RNA")

In [None]:
options(repr.plot.width = 9.5, repr.plot.height = 3.5)
pheatmap::pheatmap(t(avgexp$RNA), main = "", 
         scale = "column", cluster_cols = T, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 12)

In [None]:
mrk_tr356_all

In [None]:
avgexp = AverageExpression(cd8_l1_full_filt, features = c("ID1","ID2","CD44","NR4A2","NR4A3","RUNX3","BHLHE40",
                                                         "DUSP1", "DUSP2", "DUSP5", "DUSP8","CCR6","ITGB1"),
                           return.seurat = F, group.by = "annot_with_tr356", 
                          assay = "RNA")

In [None]:
mrk_tr356

In [None]:
FeaturePlot(cd8_l1_full_filt, features = "NR4A2")

In [None]:
options(repr.plot.width = 4.5, repr.plot.height = 5)
pheatmap::pheatmap(avgexp$RNA, main = "", 
         scale = "row", cluster_cols = T, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 12)

In [None]:
options(repr.plot.width = 4.5, repr.plot.height = 5)
pheatmap::pheatmap(avgexp$RNA, main = "", 
         scale = "row", cluster_cols = T, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 12,
                  filename = "../figures/heatmaps/tr356_tgd_hm.pdf", width = 4.5, height = 5)

## DEG regulatory signature

Let's try to see if TR3-56 cells have higher Treg signature than other cell types. 

In [None]:
treg_gene_sig  <- c('S100A4', 'FOXP3', 'ITGB1', 'AHNAK', 'TIGIT', 'ANXA2', 'IL10RA', 
                    'TNFRSF1B', 'GBP5', 'LGALS1', 'RTKN2', 'CTLA4', 'S100A10', 'IL32', 
                    'FCRL3', 'IL2RA', 'CLIC1', 'KLF6', 'ANXA5', 'SYNE2', 'S100A11', 
                    'CD74', 'CRIP1', 'EZR', 'NIBAN1', 'FLNA', 'NCF4', 'PRDM1', 'LGALS3', 
                    'CAPN2', 'ARID5B', 'SH3BGRL3', 'CST7', 'ISG20', 'MYO1F', 'LMNA', 
                    'HLA-DRB5', 'TENT5C', 'GAPDH', 'MTHFD2', 'FANK1', 'HLA-DQA1', 
                    'IL2RB', 'IKZF2', 'SRGN', 'STAM', 'CLDND1', 'DUSP4', 'BIRC3', 
                    'SAT1', 'PBXIP1', 'HLA-DRB1', 'HLA-DPA1', 'TNFRSF4', 'SHMT2', 
                    'TAGLN2', 'PLP2', 'ACTN4', 'DUSP1', 'RORA', 'CD99', 'GLCCI1', 
                    'CARD16', 'PTTG1', 'TSPAN5', 'TAP1', 'OPTN', 'EIF3A', 'ELOVL5', 
                    'LSR', 'GSTK1', 'ZFP36', 'TIFA', 'BATF', 'EMP3', 'TSC22D3', 'OGDH', 
                    'HLA-DPB1', 'CCDC50', 'LIMS1', 'RAB11FIP1', 'TRAC', 'CD84', 'CAST', 
                    'PYHIN1', 'JPT1', 'MPST', 'SAMSN1', 'ZC2HC1A', 'PMAIP1', 'S100A6', 
                    'PI16', 'OAS1', 'PPP1R18', 'NCR3', 'CD58', 'DOK2', 'BCL2L11', 
                    'SMAD3', 'GBP2', 'SYT11', 'PPP2R5C', 'PPP1R15A', 'RGS1', 'RAB37', 
                    'REEP5', 'IKZF3', 'RNF214', 'IRF1', 'ANTKMT', 'PRDX1', 'IQGAP2', 'MT2A', 
                    'TNFRSF18', 'FAS', 'DUSP2', 'CCDC167', 'HLA-DRA', 'PELI1', 'JUNB', 'LGALS9', 
                    'ZBTB38', 'SPTAN1', 'SMS', 'PARP1', 'MCL1', 'DYNLL1', 'HERC5', 'CDC25B', 
                    'SLC9A3R1', 'MYO1G', 'TPR', 'JUN', 'ID3', 'TPI1', 'RILPL2', 'CCR6', 
                    'YWHAH', 'PTGER2', 'HNRNPLL', 'PREX1', 'PSMB9', 'MYH9', 'CORO1B', 'SLAMF1', 
                    'SIT1', 'NPDC1', 'PHACTR2', 'ST8SIA6', 'ATP2B4', 'IL18R1', 'TRIM22', 'HLA-DQB1', 
                    'F5', 'TBC1D4', 'MAF', 'ATP2B1', 'C4orf48', 'GALM', 'C12orf75', 'CPA5', 
                    'PPP1CA', 'S1PR4', 'PDE4DIP', 'GATA3', 'GLIPR2', 'CHST7', 'CXCR4', 'H1-4', 
                    'GADD45B', 'RESF1', 'IER2', 'ISG15', 'CDC42EP3', 'PCBD1', 'LYST', 'TPM4', 
                    'TAB2', 'NINJ2', 'ALOX5AP', 'CCR4', 'FCER1G', 'MAP3K1', 'CXCR3', 'CD59', 
                    'SLFN5', 'CCNG2', 'ITGA4', 'SESN1', 'SPATS2L', 'HPGD', 'EFHD2', 'LIMA1', 
                    'BCL2', 'RABGAP1L', 'TOX', 'SAMD9', 'TXN', 'IFI16', 'IDS', 'TRIB2', 'CDHR3', 
                    'PALM2AKAP2', 'ICA1', 'LPAR6', 'KLRB1', 'SMC6', 'ITM2C', 'CEACAM4', 'PRF1', 
                    'CD63', 'AHR', 'IQGAP1', 'GADD45A', 'ADAM8', 'GLIPR1', 'VAV3', 'EPSTI1', 
                    'GPRIN3', 'POU2F2', 'SH2D2A', 'DENND10', 'MAP4', 'CCR10', 'PTPN18', 'IER5', 
                    'GPR183', 'LAIR2', 'CTSC', 'TTN', 'NR4A2', 'SESN3', 'CPNE2', 'MX1')


In [None]:
cd8_l1_full_filt  <- AddModuleScore(cd8_l1_full_filt, features = list(treg_gene_sig),
                                   search = F,
  ctrl = 50,
  nbin = 50,
  assay = "RNA",
  name = 'treg_gene_sig')

In [None]:
FeaturePlot(cd8_l1_full_filt, features = "treg_gene_sig1")

In [None]:
VlnPlot(cd8_l1_full_filt, features = "treg_gene_sig1", group.by = "annot_with_tr356", pt.size = 0)

In [None]:
options(repr.plot.width = 9, repr.plot.height = 7)
cd8_l1_full_filt@meta.data  %>% dplyr::select(treg_gene_sig1, annot_with_tr356)  %>% 
ggplot(aes(x = fct_reorder(annot_with_tr356, treg_gene_sig1), y = treg_gene_sig1)) +
  geom_violin() + 
#geom_jitter(alpha = 0.2) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("") +
theme(axis.text.x = element_text(angle = 90)) +
ggpubr::stat_compare_means(comparisons = list(c(5,6),c(4,6)))

In [None]:
ggsave("../figures/treg_score_in_tr356.svg", width = 9, height = 12, units = "cm")

### DEG parent clusters

Now let's see what are the differences between TR3-56 cells within MAIT, gd and CD8+ clusters. 

In [None]:
tr356  <- subset(cd8_l1_full_filt, annot_with_tr356 == "TR3-56" )

In [None]:
tr356  <- subset(tr356, annot2 != "NK cells" )

In [None]:
Idents(tr356)  <- tr356$annot2

In [None]:
tr356$annot2  %>% table

In [None]:
tr356$annot2  <- ifelse(tr356$annot2 == "NonNaive", "Conventional_CD8",tr356$annot2)

In [None]:
Idents(tr356)  <- tr356$annot2

In [None]:
mrk_tr356_in_cluster  <- FindAllMarkers(tr356, only.pos = TRUE)

In [None]:
mrk_tr356_in_cluster

In [None]:
mrk_tr356_in_cluster_genes  <- mrk_tr356_in_cluster  %>% group_by(cluster)  %>% 
dplyr::filter(p_val_adj < 0.05)  %>%  slice_head(n = 7)  %>% pull(gene)

In [None]:
avgexp = AverageExpression(tr356, features = mrk_tr356_in_cluster_genes,
                           return.seurat = F, group.by = "annot2", 
                          assay = "RNA")

In [None]:
options(repr.plot.width = 9.5, repr.plot.height = 3.5)
pheatmap::pheatmap(t(avgexp$RNA), main = "", 
         scale = "column", cluster_cols = T, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 12)

## Gene expression in Unconventional subclusters

Create annotations of groups - merge nonNaive cell subclusters together. 

In [None]:
cd8_l1_full_filt@meta.data  <- cd8_l1_full_filt@meta.data  %>% 
mutate(Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation)) 

In [None]:
cd8_l1_full_filt$annotations_l2_sample  <- paste(cd8_l1_full_filt$annot2, cd8_l1_full_filt$Sample_ID)

In [None]:
cd8_l1_full_filt$annotations_l2_sample   %>% table

Use the script that will quantify the percentage of cells with non-zero expression of a selected genes. 

In [None]:
pct_expressing_boxplot  <- function(seurat_object, gene, group.by = "annotations_l2", sample.col = "sample"){
   rn = which(rownames(seurat_object@assays$RNA)==gene)
ggtheme = function() {
  theme(
    axis.text = element_text(size = 20),
    axis.title = element_text(size = 20),
    text = element_text(size = 20, colour = "black"),
    legend.text = element_text(size = 20),
    legend.key.size =  unit(10, units = "points")
    
  )
}

df = data.frame(grouping_var = seurat_object@meta.data[[group.by]],
               value = seurat_object@assays$RNA@counts[rn,], 
               sample = seurat_object@meta.data[[sample.col]])  %>% 
mutate(expressing = if_else(value>0,1,0))  %>% 
dplyr::select(-value)  %>% 
group_by(sample, grouping_var)  %>% 
summarise(mean_expression = mean(expressing))  %>% 
pivot_wider(names_from = sample, values_from = mean_expression, values_fill = 0)  %>% 
pivot_longer(!grouping_var, names_to = "sample", values_to = "expressing")

plt = ggplot(data = df, aes(x = reorder(x = grouping_var, X = expressing, FUN = median), y = expressing)) +
#geom_boxplot(outlier.shape = NA, aes(fill = grouping_var), alpha = 0.3) + 
geom_violin(aes(fill = grouping_var), alpha = 0.3, scale = "width") + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(width = 0.1, height = 0.0, size = 2, aes(color = grouping_var)) + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "grey30") +
theme_classic() +
    theme(plot.title = element_text(hjust = 0.5)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
ggtheme() +
    ggtitle(gene) +
    ylab("Pct expressing cells") +
xlab("") + NoLegend()
    return(plt)
    }


In [None]:
options(repr.plot.width=4, repr.plot.height=6)

pct_expressing_boxplot(seurat_object = cd8_l1_full_filt, group.by = "annot2", gene = "NCAM1", 
                       sample.col = "Sample_ID")

In [None]:
options(repr.plot.width=4, repr.plot.height=6)
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt, group.by = "annot2", gene = "FOXP3", 
                       sample.col = "Sample_ID")

In [None]:
cd8_l1_full_filt_sub  <- subset(cd8_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20"))

In [None]:
library(patchwork)

In [None]:
options(repr.plot.width=16, repr.plot.height=12)
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "TGFB1", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "TIGIT", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "ITGB1", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "TNFRSF4", 
                       sample.col = "Sample_ID") +
plot_layout(ncol = 4)

In [None]:
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "IL2RA", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "IL2RB", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "IL10RA", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "CCR10", 
                       sample.col = "Sample_ID") +
plot_layout(ncol = 4)

In [None]:
options(repr.plot.width=16, repr.plot.height=12)
pct_expressing_boxplot(seurat_object = subset(, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annotations_l2", gene = "CTLA4", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub), 
                       group.by = "annotations_l2", gene = "TIGIT", 
                       sample.col = "annotations_l2") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub), 
                       group.by = "annotations_l2", gene = "IL10RA", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub), 
                       group.by = "annotations_l2", gene = "IL10", 
                       sample.col = "Sample_ID") +
plot_layout(ncol = 4)

In [None]:
options(repr.plot.width=16, repr.plot.height=12)
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "TNFRSF18", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "CCR4", 
                       sample.col = "annotations_l2") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "CCR6", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "TNFRSF14", 
                       sample.col = "Sample_ID") +
plot_layout(ncol = 4)

In [None]:
options(repr.plot.width=16, repr.plot.height=12)
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "CTLA4", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "TIGIT", 
                       sample.col = "annotations_l2") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "IL10RA", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt_sub, 
                       group.by = "annotations_l2", gene = "IL10", 
                       sample.col = "Sample_ID") +
plot_layout(ncol = 4)