# Part 9: Analysis of Unconventional T cells

In this document, we focus on Tgd and unconventional cell cluster subsetted from the main CD8 dataset. To recapitulate the analysis, please download the Tgd Seurat object available at Zenodo: [https://doi.org/10.5281/zenodo.14222418](https://doi.org/10.5281/zenodo.14222418)

In [None]:
ls()

In [None]:
.libPaths("~/R/x86_64-pc-linux-gnu-library/4.4/")
library(GEOquery)
source("diabetes_analysis_v07.R")

In [None]:
plan("multisession")

In [None]:
## Load data

In [None]:
## CD8 All
cd8_l1_full_filt  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
cd8_l1_full_filt

In [None]:
## CD8 Tgd
gd  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L2/cd8_l2_unc.rds")

# DEG and gene signatures Unconventional Tgd vs CD8 Naive


In [None]:
options(repr.plot.width = 8, repr.plot.height = 5)
DimPlot(cd8_l1_full_filt, group.by = "annotations_l2")

In [None]:
Idents(cd8_l1_full_filt)  <- cd8_l1_full_filt$annotations_l2

In [None]:
table(cd8_l1_full_filt$annotations_l2)

In [None]:
mrk  <- FindMarkers(cd8_l1_full_filt,
                   `ident.1` = "CD8 Unconventional T cells---gd T cells",
                   `ident.2` = "CD8 T cells---Naive")

In [None]:
DefaultAssay(cd8_l1_full_filt)  <- "RNA"

In [None]:
fc  <- FoldChange(cd8_l1_full_filt,
                   `ident.1` = "CD8 Unconventional T cells---gd T cells",
                   `ident.2` = "CD8 T cells---Naive")

In [None]:
fc  <- fc  %>% arrange(avg_log2FC)  %>% rownames_to_column("gene")

In [None]:
treg_gene_sig  <- c('S100A4', 'FOXP3', 'ITGB1', 'AHNAK', 'TIGIT', 'ANXA2', 'IL10RA', 
                    'TNFRSF1B', 'GBP5', 'LGALS1', 'RTKN2', 'CTLA4', 'S100A10', 'IL32', 
                    'FCRL3', 'IL2RA', 'CLIC1', 'KLF6', 'ANXA5', 'SYNE2', 'S100A11', 
                    'CD74', 'CRIP1', 'EZR', 'NIBAN1', 'FLNA', 'NCF4', 'PRDM1', 'LGALS3', 
                    'CAPN2', 'ARID5B', 'SH3BGRL3', 'CST7', 'ISG20', 'MYO1F', 'LMNA', 
                    'HLA-DRB5', 'TENT5C', 'GAPDH', 'MTHFD2', 'FANK1', 'HLA-DQA1', 
                    'IL2RB', 'IKZF2', 'SRGN', 'STAM', 'CLDND1', 'DUSP4', 'BIRC3', 
                    'SAT1', 'PBXIP1', 'HLA-DRB1', 'HLA-DPA1', 'TNFRSF4', 'SHMT2', 
                    'TAGLN2', 'PLP2', 'ACTN4', 'DUSP1', 'RORA', 'CD99', 'GLCCI1', 
                    'CARD16', 'PTTG1', 'TSPAN5', 'TAP1', 'OPTN', 'EIF3A', 'ELOVL5', 
                    'LSR', 'GSTK1', 'ZFP36', 'TIFA', 'BATF', 'EMP3', 'TSC22D3', 'OGDH', 
                    'HLA-DPB1', 'CCDC50', 'LIMS1', 'RAB11FIP1', 'TRAC', 'CD84', 'CAST', 
                    'PYHIN1', 'JPT1', 'MPST', 'SAMSN1', 'ZC2HC1A', 'PMAIP1', 'S100A6', 
                    'PI16', 'OAS1', 'PPP1R18', 'NCR3', 'CD58', 'DOK2', 'BCL2L11', 
                    'SMAD3', 'GBP2', 'SYT11', 'PPP2R5C', 'PPP1R15A', 'RGS1', 'RAB37', 
                    'REEP5', 'IKZF3', 'RNF214', 'IRF1', 'ANTKMT', 'PRDX1', 'IQGAP2', 'MT2A', 
                    'TNFRSF18', 'FAS', 'DUSP2', 'CCDC167', 'HLA-DRA', 'PELI1', 'JUNB', 'LGALS9', 
                    'ZBTB38', 'SPTAN1', 'SMS', 'PARP1', 'MCL1', 'DYNLL1', 'HERC5', 'CDC25B', 
                    'SLC9A3R1', 'MYO1G', 'TPR', 'JUN', 'ID3', 'TPI1', 'RILPL2', 'CCR6', 
                    'YWHAH', 'PTGER2', 'HNRNPLL', 'PREX1', 'PSMB9', 'MYH9', 'CORO1B', 'SLAMF1', 
                    'SIT1', 'NPDC1', 'PHACTR2', 'ST8SIA6', 'ATP2B4', 'IL18R1', 'TRIM22', 'HLA-DQB1', 
                    'F5', 'TBC1D4', 'MAF', 'ATP2B1', 'C4orf48', 'GALM', 'C12orf75', 'CPA5', 
                    'PPP1CA', 'S1PR4', 'PDE4DIP', 'GATA3', 'GLIPR2', 'CHST7', 'CXCR4', 'H1-4', 
                    'GADD45B', 'RESF1', 'IER2', 'ISG15', 'CDC42EP3', 'PCBD1', 'LYST', 'TPM4', 
                    'TAB2', 'NINJ2', 'ALOX5AP', 'CCR4', 'FCER1G', 'MAP3K1', 'CXCR3', 'CD59', 
                    'SLFN5', 'CCNG2', 'ITGA4', 'SESN1', 'SPATS2L', 'HPGD', 'EFHD2', 'LIMA1', 
                    'BCL2', 'RABGAP1L', 'TOX', 'SAMD9', 'TXN', 'IFI16', 'IDS', 'TRIB2', 'CDHR3', 
                    'PALM2AKAP2', 'ICA1', 'LPAR6', 'KLRB1', 'SMC6', 'ITM2C', 'CEACAM4', 'PRF1', 
                    'CD63', 'AHR', 'IQGAP1', 'GADD45A', 'ADAM8', 'GLIPR1', 'VAV3', 'EPSTI1', 
                    'GPRIN3', 'POU2F2', 'SH2D2A', 'DENND10', 'MAP4', 'CCR10', 'PTPN18', 'IER5', 
                    'GPR183', 'LAIR2', 'CTSC', 'TTN', 'NR4A2', 'SESN3', 'CPNE2', 'MX1')


In [None]:
fc.df  <- fc  %>%   dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)

In [None]:
library(fgsea)

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)

plotEnrichment(treg_gene_sig,
               ranks) + labs(title="treg_gene_sig in Tgd") 


In [None]:
fg  <- fgsea(pathways = list("treg_gene_sig" = treg_gene_sig),
               ranks)

In [None]:
fg$leadingEdge

In [None]:
cd8_l1_full_filt  <- AddModuleScore(cd8_l1_full_filt, features = treg_gene_sig,
                                   search = F,
  ctrl = 50,
  nbin = 50,
  assay = "RNA",
  name = 'treg_gene_sig')

In [None]:
cd8_l1_full_filt$treg_gene_sig1

In [None]:
options(repr.plot.width = 10, repr.plot.height = 8)

VlnPlot(cd8_l1_full_filt, features = "treg_gene_sig1", pt.size = 0)

In [None]:
cd8_l1_full_filt@meta.data  %>% 
ggplot(aes(x = fct_reorder(annot2, treg_gene_sig1), y = treg_gene_sig1)) +
  geom_violin() + 
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("Treg score") + theme_classic() + ggtheme() + xlab("")
# ggsave("../figures/cd356_score.svg", width = 4, height = 3.5)

In [None]:
Idents(cd8_l1_full_filt)  <- cd8_l1_full_filt$annotations_l1

table(cd8_l1_full_filt$annotations_l1)

In [None]:
mrk  <- FindMarkers(cd8_l1_full_filt,
                   `ident.1` = "CD8 T cells",
                   `ident.2` = "CD8 Unconventional T cells")

In [None]:
DefaultAssay(cd8_l1_full_filt)  <- "RNA"

In [None]:
fc  <- FoldChange(cd8_l1_full_filt,
                   `ident.1` = "CD8 T cells",
                   `ident.2` = "CD8 Unconventional T cells")

In [None]:
fc  <- fc  %>% arrange(avg_log2FC)  %>% rownames_to_column("gene")

In [None]:
treg_gene_sig  <- c('S100A4', 'FOXP3', 'ITGB1', 'AHNAK', 'TIGIT', 'ANXA2', 'IL10RA', 
                    'TNFRSF1B', 'GBP5', 'LGALS1', 'RTKN2', 'CTLA4', 'S100A10', 'IL32', 
                    'FCRL3', 'IL2RA', 'CLIC1', 'KLF6', 'ANXA5', 'SYNE2', 'S100A11', 
                    'CD74', 'CRIP1', 'EZR', 'NIBAN1', 'FLNA', 'NCF4', 'PRDM1', 'LGALS3', 
                    'CAPN2', 'ARID5B', 'SH3BGRL3', 'CST7', 'ISG20', 'MYO1F', 'LMNA', 
                    'HLA-DRB5', 'TENT5C', 'GAPDH', 'MTHFD2', 'FANK1', 'HLA-DQA1', 
                    'IL2RB', 'IKZF2', 'SRGN', 'STAM', 'CLDND1', 'DUSP4', 'BIRC3', 
                    'SAT1', 'PBXIP1', 'HLA-DRB1', 'HLA-DPA1', 'TNFRSF4', 'SHMT2', 
                    'TAGLN2', 'PLP2', 'ACTN4', 'DUSP1', 'RORA', 'CD99', 'GLCCI1', 
                    'CARD16', 'PTTG1', 'TSPAN5', 'TAP1', 'OPTN', 'EIF3A', 'ELOVL5', 
                    'LSR', 'GSTK1', 'ZFP36', 'TIFA', 'BATF', 'EMP3', 'TSC22D3', 'OGDH', 
                    'HLA-DPB1', 'CCDC50', 'LIMS1', 'RAB11FIP1', 'TRAC', 'CD84', 'CAST', 
                    'PYHIN1', 'JPT1', 'MPST', 'SAMSN1', 'ZC2HC1A', 'PMAIP1', 'S100A6', 
                    'PI16', 'OAS1', 'PPP1R18', 'NCR3', 'CD58', 'DOK2', 'BCL2L11', 
                    'SMAD3', 'GBP2', 'SYT11', 'PPP2R5C', 'PPP1R15A', 'RGS1', 'RAB37', 
                    'REEP5', 'IKZF3', 'RNF214', 'IRF1', 'ANTKMT', 'PRDX1', 'IQGAP2', 'MT2A', 
                    'TNFRSF18', 'FAS', 'DUSP2', 'CCDC167', 'HLA-DRA', 'PELI1', 'JUNB', 'LGALS9', 
                    'ZBTB38', 'SPTAN1', 'SMS', 'PARP1', 'MCL1', 'DYNLL1', 'HERC5', 'CDC25B', 
                    'SLC9A3R1', 'MYO1G', 'TPR', 'JUN', 'ID3', 'TPI1', 'RILPL2', 'CCR6', 
                    'YWHAH', 'PTGER2', 'HNRNPLL', 'PREX1', 'PSMB9', 'MYH9', 'CORO1B', 'SLAMF1', 
                    'SIT1', 'NPDC1', 'PHACTR2', 'ST8SIA6', 'ATP2B4', 'IL18R1', 'TRIM22', 'HLA-DQB1', 
                    'F5', 'TBC1D4', 'MAF', 'ATP2B1', 'C4orf48', 'GALM', 'C12orf75', 'CPA5', 
                    'PPP1CA', 'S1PR4', 'PDE4DIP', 'GATA3', 'GLIPR2', 'CHST7', 'CXCR4', 'H1-4', 
                    'GADD45B', 'RESF1', 'IER2', 'ISG15', 'CDC42EP3', 'PCBD1', 'LYST', 'TPM4', 
                    'TAB2', 'NINJ2', 'ALOX5AP', 'CCR4', 'FCER1G', 'MAP3K1', 'CXCR3', 'CD59', 
                    'SLFN5', 'CCNG2', 'ITGA4', 'SESN1', 'SPATS2L', 'HPGD', 'EFHD2', 'LIMA1', 
                    'BCL2', 'RABGAP1L', 'TOX', 'SAMD9', 'TXN', 'IFI16', 'IDS', 'TRIB2', 'CDHR3', 
                    'PALM2AKAP2', 'ICA1', 'LPAR6', 'KLRB1', 'SMC6', 'ITM2C', 'CEACAM4', 'PRF1', 
                    'CD63', 'AHR', 'IQGAP1', 'GADD45A', 'ADAM8', 'GLIPR1', 'VAV3', 'EPSTI1', 
                    'GPRIN3', 'POU2F2', 'SH2D2A', 'DENND10', 'MAP4', 'CCR10', 'PTPN18', 'IER5', 
                    'GPR183', 'LAIR2', 'CTSC', 'TTN', 'NR4A2', 'SESN3', 'CPNE2', 'MX1')


In [None]:
fc.df  <- fc  %>%   dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)

In [None]:
library(fgsea)

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)

plotEnrichment(treg_gene_sig,
               ranks) + labs(title="treg_gene_sig in Tgd") 


In [None]:
fg  <- fgsea(pathways = list("treg_gene_sig" = treg_gene_sig),
               ranks)

In [None]:
fg$leadingEdge

# Tr3-56 published data

We download the published bulk RNAseq dataset characterizing TR3-56 cells from [Terrazzano et al., 2020](https://www.nature.com/articles/s42255-020-0173-1)

In [None]:
gset <- GEOquery::getGEO('GSE106082')

In [None]:
gset <- readRDS("../data//geo//tr356.rds")

In [None]:
metadata <- data.frame(geo_id = gset$GSE106082_series_matrix.txt.gz$geo_accession,
                       cell_type = gset$GSE106082_series_matrix.txt.gz$`characteristics_ch1.1`
                       )

We will load the matrix and use it together with the metadata to identify potential Tr3-56 cells by SingleR. 

In [None]:
mtx_tr356  <- read_csv("../../240218_VN_Diabetes_V05/data/published_data/Terrazzano_2020/tr356_df_sum.csv")

In [None]:
mtx_tr356$`...1`  <- NULL

In [None]:
mtx_tr356  <- mtx_tr356  %>% column_to_rownames("SYMBOL")

In [None]:
mtx_tr356

In [None]:
plan("multisession")

## Annotation of Tr3-56 cells

In [None]:
pred <- SingleR(test = cd8_l1_full_filt@assays$RNA@counts, 
                ref=mtx_tr356, labels=metadata$cell_type, de.method="wilcox",fine.tune = F,num.threads = 4)

In [None]:
mtx_tr356

In [None]:
mtx_tr356  <- mtx_tr356  %>% mutate_all(.funs = as.numeric)

In [None]:
mtx_tr356

In [None]:
metadata$cell_type

Create a reference without  CD3+CD56- population as this is too broad for our purpose and overlaps with CD8+ population. 

In [None]:
ref_tr3_56 <- list(matrix = mtx_tr356[,c(1:6,10:12)], 
                       labels = metadata$cell_type[c(1:6,10:12)])


In [None]:
mtx_tr356

In [None]:
ref_tr3_56$labels

In [None]:
ref_tr3_56$matrix  <- as.matrix(ref_tr3_56$matrix)

In [None]:
pred <- SingleR(test = cd8_l1_full_filt@assays$RNA@counts, 
                ref=ref_tr3_56$matrix, labels=ref_tr3_56$labels, 
                fine.tune = T,num.threads = 4
       )

Add labels to the Seurat object. 

In [None]:
cd8_l1_full_filt$singler  <- pred$labels

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4.5)
DimPlot(cd8_l1_full_filt, group.by = "annotations_l2")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4.5)
DimPlot(cd8_l1_full_filt, group.by = "singler", shuffle = T, cols = c("darkseagreen1","skyblue1","red"))

In [None]:
pred$scores

## Quantification of Tr3-56 score in cell types

In [None]:
test  <- data.frame(annotation = cd8_l1_full_filt$annotations_l2,
                    pred = cd8_l1_full_filt$singler)

In [None]:
df2 <- test %>% group_by(annotation, pred) %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
df2

In [None]:
options(repr.plot.width = 16, repr.plot.height = 4.5)
ggplot(test) +
  aes(x = annotation, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
coord_flip() + 
scale_fill_manual(values = c("#d6ebd2ff", "lightskyblue1", "red2"))
#scale_fill_manual(values = c("grey","#74bc68ff", "dodgerblue3", "red2"))

In [None]:
pred$scores

In [None]:
cd8_l1_full_filt$score_tr356  <- pred$scores[,3]

We can see that the highest Tr3-56 score is found in Tgd cells and MAIT cells. 

In [None]:
options(repr.plot.width = 14, repr.plot.height = 6)
VlnPlot(cd8_l1_full_filt, features = "score_tr356", group.by = "annotations_l2", pt.size = 0)

In [None]:
options(repr.plot.width = 14, repr.plot.height = 6)
data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = "\n"))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
ggplot(aes(x = fct_reorder(Annotation, Score), y = Score)) +
  geom_violin() + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")

Group nonNaive cells together. 

In [None]:
data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
ggplot(aes(x = fct_reorder(annot2, Score), y = Score)) +
  geom_violin() + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")
# ggsave("../figures/cd356_score.svg", width = 4, height = 3.5)

In [None]:
options(repr.plot.width = 5, repr.plot.height = 4.5)

FeaturePlot(cd8_l1_full_filt, features = "score_tr356", min.cutoff = 0.2, max.cutoff = 0.32)

In [None]:
FeaturePlot(cd8_l1_full_filt, features = "NCAM1", min.cutoff = 0.2)

## Phenotype and DEG of Tr3-56 cells

In [None]:
test  <- data.frame(annot2 = cd8_l1_full_filt$annot2,
                    annotation = cd8_l1_full_filt$annotations_l2,
                    pred = cd8_l1_full_filt$singler, 
                    score = cd8_l1_full_filt$score_tr356)

In [None]:
test$pred  %>% table

In [None]:
options(repr.plot.width = 7, repr.plot.height = 10)
test  %>% 
dplyr::filter(pred == "cell subset: TR3-56") %>% 
ggplot(aes(x = fct_reorder(annotation, score), y = score)) +
  geom_violin() + 
geom_jitter(alpha = 0.2) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("") +
theme(axis.text.x = element_text(angle = 90))

In [None]:
options(repr.plot.width = 4, repr.plot.height = 6)
test  %>% 
dplyr::filter(pred == "cell subset: TR3-56") %>% 
ggplot(aes(x = fct_reorder(annot2, score), y = score)) +
  geom_violin() + 
geom_jitter(alpha = 0.2) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("") +
theme(axis.text.x = element_text(angle = 90))

## Gene expression in Unconventional subclusters

Create annotations of groups - merge nonNaive cell subclusters together. 

In [None]:
cd8_l1_full_filt@meta.data  <- cd8_l1_full_filt@meta.data  %>% 
mutate(Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation)) 

In [None]:
cd8_l1_full_filt$annotations_l2_sample  <- paste(cd8_l1_full_filt$annot2, cd8_l1_full_filt$Sample_ID)

In [None]:
cd8_l1_full_filt$annotations_l2_sample   %>% table

Use the script that will quantify the percentage of cells with non-zero expression of a selected genes. 

In [None]:
pct_expressing_boxplot  <- function(seurat_object, gene, group.by = "annotations_l2", sample.col = "sample"){
   rn = which(rownames(seurat_object@assays$RNA)==gene)
ggtheme = function() {
  theme(
    axis.text = element_text(size = 20),
    axis.title = element_text(size = 20),
    text = element_text(size = 20, colour = "black"),
    legend.text = element_text(size = 20),
    legend.key.size =  unit(10, units = "points")
    
  )
}

df = data.frame(grouping_var = seurat_object@meta.data[[group.by]],
               value = seurat_object@assays$RNA@counts[rn,], 
               sample = seurat_object@meta.data[[sample.col]])  %>% 
mutate(expressing = if_else(value>0,1,0))  %>% 
dplyr::select(-value)  %>% 
group_by(sample, grouping_var)  %>% 
summarise(mean_expression = mean(expressing))  %>% 
pivot_wider(names_from = sample, values_from = mean_expression, values_fill = 0)  %>% 
pivot_longer(!grouping_var, names_to = "sample", values_to = "expressing")

plt = ggplot(data = df, aes(x = grouping_var, y = expressing)) +
#geom_boxplot(outlier.shape = NA, aes(fill = grouping_var), alpha = 0.3) + 
geom_violin(aes(fill = grouping_var), alpha = 0.3, scale = "width") + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(width = 0.1, height = 0.0, size = 2, aes(color = grouping_var)) +
theme_classic() +
    theme(plot.title = element_text(hjust = 0.5)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
ggtheme() +
    ggtitle(gene) +
    ylab("Pct expressing cells") +
xlab("") + NoLegend()
    return(plt)
    }


In [None]:
options(repr.plot.width=4, repr.plot.height=6)

pct_expressing_boxplot(seurat_object = cd8_l1_full_filt, group.by = "annot2", gene = "NCAM1", 
                       sample.col = "Sample_ID")

In [None]:
options(repr.plot.width=4, repr.plot.height=6)
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt, group.by = "annot2", gene = "FOXP3", 
                       sample.col = "Sample_ID")

In [None]:
cd8_l1_full_filt$Experiment_ID

In [None]:
cd8_l1_full_filt_sub  <- subset(cd8_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20"))

In [None]:
options(repr.plot.width=16, repr.plot.height=6)
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annot2", gene = "TGFB1", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annot2", gene = "ITGAV", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annot2", gene = "ITGB8", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annot2", gene = "ENTPD1", 
                       sample.col = "Sample_ID") +
plot_layout(ncol = 4)

In [None]:
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annot2", gene = "CTLA4", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annot2", gene = "TIGIT", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annot2", gene = "IL10RA", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annot2", gene = "IL10", 
                       sample.col = "Sample_ID") +
plot_layout(ncol = 4)

In [None]:
options(repr.plot.width=16, repr.plot.height=12)
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annotations_l2", gene = "TGFB1", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annotations_l2", gene = "ITGAV", 
                       sample.col = "annotations_l2") +
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annotations_l2", gene = "ITGB8", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annotations_l2", gene = "ENTPD1", 
                       sample.col = "Sample_ID") +
plot_layout(ncol = 4)

In [None]:
options(repr.plot.width=16, repr.plot.height=12)
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annotations_l2", gene = "CTLA4", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annotations_l2", gene = "TIGIT", 
                       sample.col = "annotations_l2") +
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annotations_l2", gene = "IL10RA", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = subset(cd8_l1_full_filt_sub, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                       group.by = "annotations_l2", gene = "IL10", 
                       sample.col = "Sample_ID") +
plot_layout(ncol = 4)

In [None]:
cd8_l1_full_filt_sub$annot2_sample  <- paste(cd8_l1_full_filt_sub$annot2, cd8_l1_full_filt_sub$Sample_ID, sep = "___") 

In [None]:
options(future.globals.maxSize = 25000 * 1024^2)

In [None]:
avgexp  <- AverageExpression(cd8_l1_full_filt_sub, features = c("CTLA4","TIGIT","IL10RA","IL10",
                                                               "TGFB1","ITGAV","ITGB8","ENTPD1"), 
                             group.by = c("annot2_sample"), return.seurat = TRUE, assay = "RNA")

In [None]:
avgexp@meta.data  <- avgexp@meta.data  %>% separate(annot2_sample, into = c("annot2","sample"), remove = F, sep = "---")

In [None]:
VlnPlot(avgexp, features = c("CTLA4","TIGIT","IL10RA","IL10"), group.by = "annot2", ncol = 4)

In [None]:
VlnPlot(avgexp, features = c("TGFB1","ITGAV","ITGB8","ENTPD1"), group.by = "annot2", ncol = 4)

# DE genes heatmap avg.

In [None]:
rm(cd8_l1_full_filt_sub)

In [None]:
avgexp = AverageExpression(subset(cd8_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = F, group.by = "Patient_Time", 
                          assay = "RNA")

In [None]:
avgexp$RNA[which(rownames(avgexp$RNA)=="GNLY"),]  %>% as.data.frame()

## Heatmap

### All cells

In [None]:
cd8_l1_full_filt$Patient_Time_Disease  <- paste(cd8_l1_full_filt$Patient_ID, cd8_l1_full_filt$Time, 
                                               cd8_l1_full_filt$Disease)

In [None]:
cd8_l1_full_filt$Disease_time  <- paste(
                                               cd8_l1_full_filt$Disease,
cd8_l1_full_filt$Time)

In [None]:
avgexp = AverageExpression(subset(cd8_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = F, group.by = "Disease_time", 
                          assay = "RNA")

In [None]:
genes  <- c("PCBP2","PCBP1",
            "CX3CR1","TNF","GZMB","GZMA","PRF1","NKG7","GNLY","CCL5","CST7",
            "BTG1","SELL","IL7R","CCR7","BTG2","SLAMF6","LEF1",
             "TNFAIP3","TSC22D3","NKFBIA","DUSP1")

In [None]:
avgexp  <- avgexp$RNA[which(rownames(avgexp$RNA) %in% genes),]

In [None]:
library(pheatmap)

options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp, main = "", scale = "row", cluster_cols = F, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
dir.create("../tables/avg_heatmap/")
write.csv(avgexp, "../tables/avg_heatmap/cd8_avg.csv")

### Average of patient

In [None]:
genes  <- c("PCBP2","PCBP1",
            "CX3CR1","TNF","GZMB","GZMA","PRF1","NKG7","GNLY","CCL5","CST7",
            "BTG1","SELL","IL7R","CCR7","BTG2","SLAMF6","LEF1",
             "TSC22D3","NKFBIA","DUSP1","CXCR4","PI3KR1","CCL4", "IFI44L","EPSTI1","MX1", "FOXO1", "ISG15")

In [None]:
avgexp = AverageExpression(subset(cd8_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = T, group.by = "Patient_Time_Disease", 
                          assay = "RNA")

In [None]:
avgexp$Patient_Time_Disease  <- colnames(avgexp)
avgexp@meta.data  <- avgexp@meta.data  %>% 
separate(Patient_Time_Disease, into = c("Patient", "Time", "Disease"), sep = " ", remove = F)

In [None]:
avgexp$Disease_Time  <- paste(avgexp$Disease, avgexp$Time)

In [None]:
avgexp2 = AverageExpression(avgexp, 
                             return.seurat = F, group.by = "Disease_Time")

In [None]:
avgexp3  <- avgexp2$RNA[which(rownames(avgexp2$RNA) %in% genes),]

In [None]:
avgexp3

In [None]:
options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp3, main = "", scale = "row", cluster_cols = F, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
write.csv(avgexp3, "../tables/avg_heatmap/cd8_avg_by_patient.csv")

In [None]:
genes2  <- c("PCBP2","PCBP1","CXCR4","FOXO1","IL7R","LEF1","CCR7","SELL",
             "DUSP1","TSC22D3","NFKBIA","TNFAIP3",
             "IFI44L","ISG15","MX1","EPSTI1",
            "PRF1","NKG7","GZMB","CST7","GNLY","GZMA","CX3CR1","CCL5","TNF"
            )

In [None]:
avgexp3  <- avgexp2$RNA[which(rownames(avgexp2$RNA) %in% genes2),]

In [None]:
pheatmap(avgexp3[match(genes2,rownames(avgexp3)),], 
         main = "", scale = "row", cluster_cols = F, cluster_rows =F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 5, height = 10,
                  fontsize = 9)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp3[match(genes2,rownames(avgexp3)),], 
         main = "", scale = "row", cluster_cols = F, cluster_rows =F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 5, height = 10,
                  fontsize = 9,
        filename = "../figures/heatmaps/de_genes_cd8.pdf")

In [None]:
plot_gene  <- function(gene){
    df  <- as.data.frame(avgexp$RNA[which(rownames(avgexp$RNA)==gene),])  %>% rownames_to_column("Patient_Time")
    colnames(df)[2]  <- "gene2"
    df  <-  df  %>% separate(Patient_Time, into = c("Patient","Time"), sep = " ", remove = F)  %>% 
    mutate(group = ifelse(substr(Patient,1,1)=="1","Dia","Ctrl"))  %>% 
    mutate(Condition = paste(group, Time))
   # print(df)
 p  <-    df  %>% ggplot(aes(x = Condition, y = gene2)) + 
 geom_violin(alpha = 0.3, aes(fill = Condition), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
  ylim(0,NA) +
  theme_classic() +
theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) +
  ggtitle(gene) + ggtheme()
    
    p2  <-    df  %>% dplyr::filter(Patient != 116)  %>% ggplot(aes(x = Condition, y = gene2)) + 
 geom_violin(alpha = 0.3, aes(fill = Condition), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(2,3)), paired = TRUE) +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
  ylim(0,NA) +
  theme_classic() +
theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) +
  ggtitle(gene) + ggtheme()
    p  <- p + p2
    return(p)
    }

In [None]:
plot_gene2  <- function(gene){
    options(repr.plot.width = 2.5, repr.plot.height = 4)
    df  <- as.data.frame(avgexp$RNA[which(rownames(avgexp$RNA)==gene),])  %>% rownames_to_column("Patient_Time")
    colnames(df)[2]  <- "gene2"
    df  <-  df  %>% separate(Patient_Time, into = c("Patient","Time"), sep = " ", remove = F)  %>% 
    mutate(group = ifelse(substr(Patient,1,1)=="1","Dia","Ctrl"))  %>% 
    mutate(Condition = paste(group, Time))
   # print(df)
 p  <-    df  %>% ggplot(aes(x = Condition, y = gene2)) + 
 geom_violin(alpha = 0.3, aes(fill = Condition), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
  ylim(0,NA) +
  theme_classic() +
theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_blank(),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) +
  ggtitle(gene) + ggtheme() + NoLegend()
  
    return(p)
    }

In [None]:
plot_gene("IFI44L")

In [None]:
plot_gene2("NKG7")

In [None]:
plot_gene2("BTN3A2")

In [None]:
plot_gene("ASCL2")

In [None]:
plot_gene("DUSP1")

In [None]:
plot_gene("TSC22D3")

In [None]:
plot_gene("RBM3")

In [None]:
plot_gene("GADD45B")

In [None]:
DimPlot(cd8_l2_nk)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 4)
FeaturePlot(cd8_l2_nk, features = "NCAM1", min.cutoff = 0)

In [None]:
FeaturePlot(cd8_l2_nk, features = "HLA-DRB1", min.cutoff = 0)

In [None]:
FeaturePlot(cd8_l2_nk, features = "CCR7", min.cutoff = 0)

## gd CD8

In [None]:
gd  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L2/cd8_l2_unc.rds")

In [None]:
gd

In [None]:
options(repr.plot.width = 8, repr.plot.height = 5)
DimPlot(gd, group.by = "annotations_manual", cols = gd@misc$cols_annotations)

In [None]:
df4  <- create_df4(gd)

In [None]:
df4

In [None]:
p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(1,2)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
p5

In [None]:
options(repr.plot.width = 15, repr.plot.height = 7)

p5

ggsave("../figures/subset_characterization/tgd_subsets_in_condition.svg",
       width = 30, height = 12, units = "cm")

In [None]:
  p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
dplyr::filter(Patient_ID != "116")  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(2,3)), paired = TRUE)+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
p5

In [None]:
scCustomize::Plot_Density_Custom(seurat_object = gd, features = "CD8A", custom_palette = c("lightblue","khaki1","red")) 

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4)
DimPlot(gd, group.by = "annotations_manual", 
        cols = rep("grey88",times = 6))

In [None]:
cd8_l1_full_filt  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
cd8_l1_full_filt@meta.data  <- left_join(cd8_l1_full_filt@meta.data, md_gd)

In [None]:
md_gd  <- data.frame(barcode = colnames(gd), 
                     gd$annotations_manual)

In [None]:
table(md_gd$barcode %in% colnames(cd8_l1_full_filt))

In [None]:
cd8_l1_full_filt$gd.annotations_manual  %>% table

In [None]:
rownames(cd8_l1_full_filt@meta.data)  <- colnames(cd8_l1_full_filt)

In [None]:
options(repr.plot.width = 10, repr.plot.height = 7.5)
DimPlot(cd8_l1_full_filt, group.by = "gd.annotations_manual", cols = gd@misc$cols_annotations)

In [None]:
FeaturePlot(gd, features = c("ISG15", "IRF7","MX1","MKI67"), ncol = 4)

In [None]:
DimPlot(gd, group.by = "annotations_manual", split.by = "Condition")

In [None]:
options(repr.plot.width = 22, repr.plot.height = 22)

DimPlot(gd, group.by = "annotations_manual", split.by = "Patient_ID", ncol = 5)

In [None]:
gd_subset  <- subset(gd, Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20")) 

density  <- gd_subset@meta.data  %>% dplyr::select(Sample_ID, Condition)

density$x_umap  <- gd_subset@reductions$umap@cell.embeddings[,1]
density$y_umap  <- gd_subset@reductions$umap@cell.embeddings[,2]


library(ggplot2)
library(dplyr)
library(viridis)

colfunc <- colorRampPalette(c("white", "grey85","grey75","dodgerblue","green","yellow","red"))


options(repr.plot.width = 22, repr.plot.height = 5)

p1  <- density %>%
dplyr::filter(Condition == "Dia T0")  %>% 
  ggplot(aes(x=x_umap, y= y_umap)) + 
 geom_point(shape = "")  + geom_density_2d_filled(bins = 40)  + 
theme_classic() + ggtheme() +
xlim(-10,7)+  ylim(-7,5) +
scale_fill_manual(values = colfunc(40))  + 
ggtitle("Dia T0") + NoLegend()

p2   <-   density %>%
dplyr::filter(Condition == "Ctrl T0")  %>% 
  ggplot(aes(x=x_umap, y= y_umap)) + 
 geom_point(shape = "")  + geom_density_2d_filled(bins = 40)  + 
theme_classic() + ggtheme() +
xlim(-10,7)+  ylim(-7,5) +
scale_fill_manual(values = colfunc(40))  + 
ggtitle("Ctrl T0") + NoLegend()

p3  <-   density %>%
dplyr::filter(Condition == "Dia T1")  %>% 
  ggplot(aes(x=x_umap, y= y_umap)) + 
 geom_point(shape = "")  + geom_density_2d_filled(bins = 40)  + 
theme_classic() + ggtheme() +
xlim(-10,7)+  ylim(-7,5) +
scale_fill_manual(values = colfunc(40))  + 
ggtitle("Dia T1") 

p1 + p2 + p3

In [None]:
gd_subset  <- subset(gd, annotations_manual %in% c("CD8Tgd1: FGFBP2 GZMH GZMB","CD8Tgd2: TRDV2 SELL COTL1") &
                    )


In [None]:
density  <- gd_subset@meta.data  %>% dplyr::select(Sample_ID, Condition)

density$x_umap  <- gd_subset@reductions$umap@cell.embeddings[,1]
density$y_umap  <- gd_subset@reductions$umap@cell.embeddings[,2]

density

In [None]:
options(repr.plot.width = 16, repr.plot.height = 5)

p1  <- density %>%
dplyr::filter(Condition == "Dia T0")  %>% 
  ggplot(aes(x=x_umap, y= y_umap)) + 
 geom_point(shape = "")  + geom_density_2d_filled(bins = 40)  + 
theme_classic() + ggtheme() +
xlim(-10,5)+ 
#ylim(-12,6) +
scale_fill_manual(values = colfunc(40))  + 
ggtitle("Dia T0") + NoLegend()

p2   <-   density %>%
dplyr::filter(Condition == "Dia T1")  %>% 
  ggplot(aes(x=x_umap, y= y_umap)) + 
 geom_point(shape = "")  + geom_density_2d_filled(bins = 40)  + 
theme_classic() + ggtheme() +
xlim(-10,5)+  
#ylim(-12,6) +
scale_fill_manual(values = colfunc(40))  + 
ggtitle("Dia T1") + NoLegend()

p3  <-   density %>%
dplyr::filter(Condition == "Ctrl T0")  %>% 
  ggplot(aes(x=x_umap, y= y_umap)) + 
 geom_point(shape = "")  + geom_density_2d_filled(bins = 40)  + 
theme_classic() + ggtheme() +
xlim(-10,5)+  
#ylim(-12,6) +
scale_fill_manual(values = colfunc(40))  + 
ggtitle("Ctrl")  + NoLegend()

p1 + p2 + p3

In [None]:
ggsave("../figures/density_plot_gd.png", width = 30, height = 10, units = "cm")
ggsave("../figures/density_plot_gd.svg", width = 30, height = 10, units = "cm")

In [None]:
all_markers  <- read_csv("../tables/de_genes/240319_cd4_all_markers_without_sex.csv")

In [None]:
all_markers

In [None]:
all_markers  %>% dplyr::filter(source == "scRNAseq_RNA" 
              & dataset == "cd4_l2_unc")    %>% 
group_by(test_type)  %>%
arrange(p_val_adj)  

In [None]:
cd4_l2_unc_genes  <- all_markers  %>% dplyr::filter(source == "scRNAseq_RNA" 
              & dataset == "cd4_l2_unc")    %>% 
group_by(test_type)  %>%
slice_head(n = 5)  %>% 
pull(gene)


In [None]:
cd4_l2_unc_genes

In [None]:
options(repr.plot.width = 20, repr.plot.height = 25)
DefaultAssay(gd)  <- "RNA"
FeaturePlot(gd, features = cd4_l2_unc_genes, ncol = 5, min.cutoff = 0)

In [None]:
md2  <- get_gene_pct_expression(gd, c("GZMB","CTLA4"))

In [None]:
options(repr.plot.width = 8, repr.plot.height = 5)
# Plot the results
md2 %>% 
dplyr::filter(Condition %in% c("Ctrl T0", "Dia T0", "Dia T1") & 
              Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
ggplot(aes(x = Condition, y = as.numeric(pct_express))) + 
  geom_boxplot(outlier.shape = NA) +
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(position=position_jitter(0.2), size = 2, aes(color = factor(Condition))) +
  theme_minimal() + 
  facet_wrap(~gene, scales = "free", ncol = 5) + 
  ylim(c(0,NA)) +
  ylab("Percentage of expressing cells") +
  theme(legend.title = element_blank()) + stat_compare_means(label = "p.format")

In [None]:
gd_t0  <- subset(gd, 
                            Condition %in% c("Ctrl T0", "Dia T0") & 
                            Experiment_ID_2 %in% c("Exp16", "Exp18", "Exp19", "Exp20"))
gd_t0$Condition_ID  <- paste(gd_t0$Condition, gd_t0$Sample_ID)

In [None]:
avgexp = AggregateExpression(gd_t0, 
                                       features = rownames(gd_t0),
                           return.seurat = T, group.by = "Condition_ID", 
                          assay = "RNA")



In [None]:
colnames(avgexp)

In [None]:
avgexp$sample  <- colnames(avgexp)

In [None]:
avgexp@meta.data  <- avgexp@meta.data  %>% separate(sample, 
                                                    into = c("Condition", "Time","Sample_ID"), 
                                                    remove = F,
                                                    sep = " ")

In [None]:
Idents(avgexp)  <- avgexp$Condition

In [None]:
mrk  <- FindAllMarkers(avgexp, only.pos = T)

In [None]:
mrk

In [None]:
md3  <- get_gene_pct_expression(gd, mrk$gene)

In [None]:
options(repr.plot.width = 10, repr.plot.height = 12)
# Plot the results
md3 %>% 
dplyr::filter(Condition %in% c("Ctrl T0", "Dia T0") & 
              Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
ggplot(aes(x = Condition, y = as.numeric(pct_express))) + 
  geom_boxplot(outlier.shape = NA) +
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(position=position_jitter(0.2), size = 2, aes(color = factor(Condition))) +
  theme_minimal() + 
  facet_wrap(~gene, scales = "free", ncol = 5) + 
  ylim(c(0,NA)) +
  ylab("Percentage of expressing cells") +
  theme(legend.title = element_blank()) + stat_compare_means(label = "p.format")

In [None]:
options(repr.plot.width = 10, repr.plot.height = 12)
# Plot the results
md3 %>% 
dplyr::filter(Condition %in% c("Ctrl T0", "Dia T0", "Dia T1") & 
              Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
ggplot(aes(x = Condition, y = as.numeric(pct_express))) + 
  geom_boxplot(outlier.shape = NA) +
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(position=position_jitter(0.2), size = 2, aes(color = factor(Condition))) +
  theme_minimal() + 
  facet_wrap(~gene, scales = "free", ncol = 5) + 
  ylim(c(0,NA)) +
  ylab("Percentage of expressing cells") +
  theme(legend.title = element_blank()) + stat_compare_means(label = "p.format")


In [None]:
options(repr.plot.width = 20, repr.plot.height = 12)
DefaultAssay(gd)  <- "RNA"
FeaturePlot(gd, features = mrk$gene, ncol = 5, min.cutoff = 0)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(gd, label = T)

In [None]:
gd  <- subset(gd, seurat_clusters %in% c(2,3))

In [None]:
DefaultAssay(gd)  <- "RNA"

In [None]:
FeaturePlot(gd, features = "CTLA4", ncol = 5, min.cutoff = 0)

In [None]:
scCustomize::Plot_Density_Custom(gd, features = "CTLA4", custom_palette = c("lightblue","khaki1","red"))

In [None]:
FeaturePlot(gd, features = "IL19", ncol = 5, min.cutoff = 4)

In [None]:
FeaturePlot(gd, features = "NCAM1", min.cutoff = 0, ncol = 4)

In [None]:
FeaturePlot(gd, features = "GZMK", min.cutoff = 0, ncol = 4)

In [None]:
FeaturePlot(gd, features = "GZMB", min.cutoff = 0, ncol = 4)

In [None]:
FeaturePlot(gd, features = "GZMK", min.cutoff = 0, ncol = 4)

In [None]:
FeaturePlot(gd, features = "", min.cutoff = 0)

In [None]:
FeaturePlot(gd, features = "IL13A", min.cutoff = 0)

In [None]:
genes  <- grep(rownames(gd@assays$RNA@counts), pattern = "^IL", value = T)

In [None]:
genes

In [None]:
for(i in genes){
    print(FeaturePlot(gd, features = i, min.cutoff = 0))
}

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
FeaturePlot(gd, features = "GZMH", min.cutoff = 0)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
FeaturePlot(gd, features = "GZMB", min.cutoff = 0)

In [None]:
Idents(gd)  <- gd$annotations_manual

In [None]:
mrk  <- FindAllMarkers(gd, only.pos = T)

In [None]:
mrk  %>% group_by(cluster)  %>% slice_head(n = 5)

In [None]:
mrk  %>% dplyr::filter(cluster == "CD8Tgd2: TRDV2 SELL COTL1") 

In [None]:
avgexp = AverageExpression(gd, features = rev(str_to_upper(c(
    "LTB","CEBPD","CXCR6","DUSP1","NFKBIA","CTLA4","HAVCR2","PDCD1","PDE4D",
    "TRGC1","CX3CR1","GZMH","GZMB","GZMK","GNLY","CCL4","ITGB2","TENT5C",
    "SELL","CXCR4","IFI44L","IFI6","ISG15","MX1","IL7R","IL1R","KIT","IL23R","TRDC"
    ))), return.seurat = F, 
                           group.by = "annotations_manual", assays = "RNA")

avgexp

library(pheatmap)

In [None]:
DimPlot(gd, group.by = "annotations_manual")

In [None]:
avgexp = AverageExpression(gd, features = rev(str_to_upper(c(
    "SELL","TRDC","TRGC1","GNLY","CCL4","CX3CR1","GZMB","HAVCR2","CTLA4",
    "GZMK","IL7R","IL23R","LTB","KIT","CXCR6","CEBPD",
    "NFKBIA","DUSP1","TENT5C","CXCR4","PDE4D","MX1","ISG15","IFI44L"
   ))), return.seurat = F, 
                           group.by = "annotations_manual", assays = "RNA")

In [None]:
options(repr.plot.width = 9, repr.plot.height = 4)
pheatmap(t(avgexp$RNA)[c(3,4,1,2,5,6),], main = "", scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
options(repr.plot.width = 9, repr.plot.height = 4)
pheatmap(t(avgexp$RNA)[c(3,4,1,2,5,6),], main = "", scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,filename = "../figures/heatmaps/heatmap_gd.pdf",
                  fontsize = 9)

In [None]:
cd8_prolif  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L3/cd8_l3_prolif.rds")

In [None]:
DimPlot(cd8_prolif)

In [None]:
FeaturePlot(cd8_prolif, features = c("TRGV9","CD3D","TRAC","TRDC"))

In [None]:
FeaturePlot(cd8_prolif, features = c("TRGC1","TRDV2","NCAM1","FCER1G"))

# TCR gd clones from MIXCR

We will  now load and analyze the TCR repertoire data from Tgd cells recovered from the gene expression reads by Mixcr. 

In [None]:
paths_clones  <- paste0(list.dirs("../data/mixcr", recursive = F), "/clones.tsv")

In [None]:
donor_ids  <- list.dirs("../data/mixcr", recursive = F, full.names = F)

In [None]:
paths_clones

In [None]:
md  <- cd8_l1_full_filt@meta.data

In [None]:
md$raw_barcode  <- substr(rownames(md),1,16)

In [None]:
md$barcode  <- rownames(md)

Prepare function to read and process TCRG and TCRD data. 

In [None]:
read_and_prep_trgd  <- function(i){
    # read mixcr clone output file
    clone  <- read_tsv(file = paths_clones[i])
    
    # filter the metadata of the currently processed donor
    md_filt  <- md  %>% dplyr::filter(Patient_ID == donor_ids[i])
    
    # extract chain information
    clone$chain  <- substr(clone$allVHitsWithScore,1,3)
    
    # filter TRG, TRD chains and select relevant columns    
    clone_filt  <- clone  %>% dplyr::filter(chain %in% c("TRG","TRD"))  %>% 
    group_by(tagValueCELL, chain)  %>% 
    slice_max(n = 1, order_by = uniqueMoleculeCount, with_ties = F)  %>% 
    dplyr::select(raw_barcode = tagValueCELL, aaSeqCDR3, chain)  %>% 
    pivot_wider(names_from = chain, names_prefix = "cdr3_", values_from = aaSeqCDR3)
    
    # join to metadata
    md_with_trg  <- md_filt  %>% left_join(clone_filt)
}

Map function to all outputs. 

In [None]:
md_with_trgd  <- map(1:length(paths_clones), .f = read_and_prep_trgd)

In [None]:
md_with_trgd  <- bind_rows(md_with_trgd)

In [None]:
md_with_trgd

Fix rownames.

In [None]:
rownames(md_with_trgd)  <- md_with_trgd$barcode

In [None]:
new_md  <- md  %>% dplyr::select(barcode)  %>% left_join(md_with_trgd)

In [None]:
all.equal(new_md$barcode, colnames(cd8_l1_full_filt))

In [None]:
rownames(new_md)  <- colnames(cd8_l1_full_filt)

In [None]:
cd8_l1_full_filt@meta.data  <- new_md

In [None]:
cd8_l1_full_filt$has_trg  <- ifelse(is.na(cd8_l1_full_filt$cdr3_TRG), "No","Yes")
cd8_l1_full_filt$has_trd  <- ifelse(is.na(cd8_l1_full_filt$cdr3_TRD), "No","Yes")


In [None]:
options(repr.plot.width = 7, repr.plot.height = 6)
DimPlot(cd8_l1_full_filt, group.by = "has_trg", order = "Yes", cols = c("grey88","darkred"))

In [None]:
DimPlot(cd8_l1_full_filt, group.by = "has_trd", order = "Yes", cols = c("grey88","darkred"))

In [None]:
cd8_l1_full_filt@meta.data  %>% 
mutate(cdr3_TRG = if_else(is.na(cdr3_TRG),0,1))  %>% 
mutate(cdr3_TRD = if_else(is.na(cdr3_TRD),0,1))  %>% 
mutate(bothGD = if_else(cdr3_TRG==1&cdr3_TRD==1,1,0))  %>% 

group_by(annotations_l2)  %>% summarise(mean_trg = mean(cdr3_TRG),
                                        mean_bothGD = mean(bothGD),
                                        mean_trd = mean(cdr3_TRD)
                                        )  %>% 
arrange(desc(mean_trg))