# Part 8: Analysis of Treg cells

In this document, we focus on Treg cluster subsetted from the main CD4 dataset. To recapitulate the analysis, please download the Treg Seurat object available at Zenodo: [https://doi.org/10.5281/zenodo.14222418](https://doi.org/10.5281/zenodo.14222418)

In [None]:
source("diabetes_analysis_v07.R")

In [None]:
plan("multisession")

We will load the datasets of all CD4 cells and Treg cells.

In [None]:
### CD4 All
cd4_l2  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L2/cd4_subcluster.rds")

In [None]:
## CD4 Treg
treg  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L3/cd4_l3_treg.rds")

We will add the colors and metadata for future use. 

In [None]:
treg@misc$cols_annotations  <- c( "#e56381ff","#fbca93ff",   "#a93d60ff", "#ffbdcbff",
                                 "#f9dd60ff", "#66033cff")[c(2,4,5,1)]

treg@misc$dataset_name  <- "treg"

treg@misc$all_md  <- cd4_l2@meta.data  %>% 
                            dplyr::select(Sample_ID, Condition,  
                                          Disease, 
                                          Sex, Age, Age_group, Patient_ID, 
                                          Time, Experiment_ID)   %>% unique

## Projection on L2 CD4 subcluster

Here, we will create a visualization showing Treg subclusters on the whole Cd4 dataset. 

In [None]:
md_to_join  <- treg@meta.data  %>% dplyr::select(barcode, treg_annotations = annotations_manual)

In [None]:
table(md_to_join$barcode %in% cd4_l2$barcode)

In [None]:
head(colnames(treg))

In [None]:
md_to_join  <- cd4_l2@meta.data  %>% left_join(md_to_join)
rownames(md_to_join)  <- colnames(cd4_l2)

cd4_l2@meta.data  <- md_to_join

In [None]:
cd4_l2$treg_annotations  %>% table

In [None]:
options(repr.plot.width = 10, repr.plot.height = 7.5)

DimPlot(cd4_l2, group.by = "treg_annotations", cols = treg@misc$cols_annotations)

## Percentage of expressing cells violin plots

In this section we will calculate the percentage of cells with non-zero expression of classical Treg markers. 

We will start by creating an annotation column containing L2 annotations for non-Treg cells and L3 annotations for Treg cells.

In [None]:
cd4_l2@meta.data  <- cd4_l2@meta.data  %>% mutate(
    annotations_treg_and_all = if_else(annotations_manual == "Treg",
                                          treg_annotations,
                                          annotations_manual)) 

Next, we will calculate the percentage of expressing cells in each subset (L2 or L3 annotations) in each sample. 

In [None]:
pct_expressing_boxplot  <- function(seurat_object, gene, group.by = "annotations_l2", sample.col = "sample"){
   rn = which(rownames(seurat_object@assays$RNA)==gene)
ggtheme = function() {
  theme(
    axis.text = element_text(size = 20),
    axis.title = element_text(size = 20),
    text = element_text(size = 20, colour = "black"),
    legend.text = element_text(size = 20),
    legend.key.size =  unit(10, units = "points")
    
  )
}

df = data.frame(grouping_var = seurat_object@meta.data[[group.by]],
               value = seurat_object@assays$RNA@counts[rn,], 
               sample = seurat_object@meta.data[[sample.col]])  %>% 
mutate(expressing = if_else(value>0,1,0))  %>% 
dplyr::select(-value)  %>% 
group_by(sample, grouping_var)  %>% 
summarise(mean_expression = mean(expressing))  %>% 
pivot_wider(names_from = sample, values_from = mean_expression, values_fill = 0)  %>% 
pivot_longer(!grouping_var, names_to = "sample", values_to = "expressing")

plt = ggplot(data = df, aes(x = grouping_var, y = expressing)) +
#geom_boxplot(outlier.shape = NA, aes(fill = grouping_var), alpha = 0.3) + 
geom_violin(aes(fill = grouping_var), alpha = 0.3, scale = "width") + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(width = 0.1, height = 0.0, size = 2, aes(color = grouping_var)) +
theme_classic() +
    theme(plot.title = element_text(hjust = 0.5)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
ggtheme() +
    ggtitle(gene) +
    ylab("Pct expressing cells") +
xlab("") + NoLegend()
    return(plt)
    }


In [None]:
options(repr.plot.width=7, repr.plot.height=9)

pct_expressing_boxplot(seurat_object = cd4_l2, group.by = "annotations_treg_and_all", gene = "FOXP3", 
                       sample.col = "Sample_ID") 

Visualize genes of interest in Treg subpopulations and naive T cells.

In [None]:
cd4_l2$annotations_manual  %>% table

In [None]:
treg_and_naive  <- subset(cd4_l2, annotations_manual %in% c("Treg","Naive"))

In [None]:
options(repr.plot.width=16, repr.plot.height=7)
pct_expressing_boxplot(seurat_object = treg_and_naive, group.by = "annotations_treg_and_all", gene = "CTLA4", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = treg_and_naive, group.by = "annotations_treg_and_all", gene = "FOXP3", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = treg_and_naive, group.by = "annotations_treg_and_all", gene = "IL10RA", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = treg_and_naive, group.by = "annotations_treg_and_all", gene = "IL2RA", 
                       sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = treg_and_naive, group.by = "annotations_treg_and_all", gene = "TIGIT", 
                       sample.col = "Sample_ID") +
plot_layout(ncol = 5)

In [None]:
ggsave(filename = "../figures/subset_characterization/treg_genes_violin.svg", 
      width = 50, height = 18, units = "cm", create.dir = TRUE)

## Frequency plot figure

In this part we will visualize the frequency of Treg subpopulations. 

We will only be using cells from the final experiment. 

In [None]:
treg_subset  <- subset(treg, Condition %in% c("Ctrl T0", "Dia T0", "Dia T1") & Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20")) 

In [None]:
df4  <- create_df4(treg_subset)

In [None]:
df4

In [None]:
 p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(1,2)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 9, repr.plot.height = 7)
p5

In [None]:
ggsave("../figures/subset_characterization/treg_subsets_in_condition.svg",
       width = 25, height = 12, units = "cm")

In [None]:
df4

In [None]:
    p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
dplyr::filter(Patient_ID != "116")  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(2,3)), paired = TRUE)+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 9, repr.plot.height = 7)
p5

## Treg density

We will visualize the density of Treg cells coming from different contidions on the UMAP plot. 

In [None]:
density  <- treg_subset@meta.data  %>% dplyr::select(Sample_ID, Condition)

density$x_umap  <- treg_subset@reductions$umap@cell.embeddings[,1]
density$y_umap  <- treg_subset@reductions$umap@cell.embeddings[,2]


library(ggplot2)
library(dplyr)
library(viridis)

colfunc <- colorRampPalette(c("white", "grey85","grey75","dodgerblue","green","yellow","red"))

In [None]:
options(repr.plot.width = 10, repr.plot.height = 5)

p1  <- density %>%
filter(Condition == "Dia T0")  %>% 
  ggplot(aes(x=x_umap, y= y_umap)) + 
 geom_point(shape = "")  + geom_density_2d_filled(bins = 30)  + 
theme_classic() + ggtheme() +
xlim(-10,10)+  ylim(-8,8) +
scale_fill_manual(values = colfunc(30))  + 
ggtitle("Dia T0") + NoLegend()

p2   <-   density %>%
filter(Condition == "Ctrl T0")  %>% 
  ggplot(aes(x=x_umap, y= y_umap)) + 
 geom_point(shape = "")  + geom_density_2d_filled(bins = 30)  + 
theme_classic() + ggtheme() +
xlim(-10,10)+  ylim(-8,8) +
scale_fill_manual(values = colfunc(30))  + 
ggtitle("Ctrl T0") + NoLegend()


p1 + p2

In [None]:
options(repr.plot.width = 14, repr.plot.height = 5)

p1  <- density %>%
filter(Condition == "Dia T0")  %>% 
  ggplot(aes(x=x_umap, y= y_umap)) + 
 geom_point(shape = "")  + geom_density_2d_filled(bins = 30)  + 
theme_classic() + ggtheme() +
xlim(-10,10)+  ylim(-8,8) +
scale_fill_manual(values = colfunc(30))  + 
ggtitle("Dia T0") + NoLegend()

In [None]:
p2   <-   density %>%
filter(Condition == "Ctrl T0")  %>% 
  ggplot(aes(x=x_umap, y= y_umap)) + 
 geom_point(shape = "")  + geom_density_2d_filled(bins = 30)  + 
theme_classic() + ggtheme() +
xlim(-10,10)+  ylim(-8,8) +
scale_fill_manual(values = colfunc(30))  + 
ggtitle("Ctrl T0") + NoLegend()

In [None]:
p3  <-  density %>%
filter(Condition == "Dia T1")  %>% 
  ggplot(aes(x=x_umap, y= y_umap)) + 
 geom_point(shape = "")  + geom_density_2d_filled(bins = 30)  + 
theme_classic() + ggtheme() +
xlim(-10,10)+  ylim(-8,8) +
scale_fill_manual(values = colfunc(30))  + 
ggtitle("Dia T1") + NoLegend()

In [None]:
options(repr.plot.width = 14, repr.plot.height = 5)
p1 + p2 + p3

We will create a manual gate that will be used for quantification of Treg1 cells. 

In [None]:
options(repr.plot.width = 14, repr.plot.height = 5)

p1 + geom_vline(xintercept = 6.8) + 
geom_vline(xintercept = 2.8) + 
geom_hline(yintercept = -1.5) + 
geom_hline(yintercept = -5) + p2 + 
geom_vline(xintercept = 6.8) + 
geom_vline(xintercept = 2.8) + 
geom_hline(yintercept = -1.5) + 
geom_hline(yintercept = -5) + p3 +
geom_vline(xintercept = 6.8) + 
geom_vline(xintercept = 2.8) + 
geom_hline(yintercept = -1.5) + 
geom_hline(yintercept = -5) 

In [None]:
ggsave("../figures/treg/density_plot.png", width = 20, height = 10, units = "cm")
ggsave("../figures/treg/density_plot.svg", width = 20, height = 10, units = "cm")

In [None]:
treg_subset$umap1  <- treg_subset@reductions$umap@cell.embeddings[,1]
treg_subset$umap2  <- treg_subset@reductions$umap@cell.embeddings[,2]


In [None]:
treg_subset$cells_in_gate  <- ifelse(treg_subset$umap1 < 6.8 & treg_subset$umap1 > 2.8 &
                                     treg_subset$umap2 < -1.5 & treg_subset$umap2 > -5,
                                     "gate","no_gate")

In [None]:
DimPlot(treg_subset, group.by = "cells_in_gate")

In [None]:
df4

Quantification of cells in gate. 

In [None]:
seurat_meta_data <- treg_subset@meta.data

# Create grouped dataframe, calculate the frequencies of clusters
df4 <- seurat_meta_data %>% group_by(Sample_ID, cells_in_gate) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
  mutate(freq = n / sum(n)) 

# Control - all sums should be one
#df4 %>% group_by(Sample_ID) %>% summarise(sum = sum(freq))

# As we've lost non-grouping variables, let's join them back
md_to_join <- seurat_meta_data %>% dplyr::select(Sample_ID, Condition, Disease, Patient_ID) %>% unique()
df4  <- left_join(df4, md_to_join, by = "Sample_ID")

# add zero which was not added to table
pt_454_gate  <- df4  %>% filter(freq == 1)
pt_454_gate$cells_in_gate  <- "gate"
pt_454_gate$n  <- 0
pt_454_gate$freq  <- 0

df4  <- rbind(df4, pt_454_gate)

In [None]:
options(repr.plot.width = 7.5, repr.plot.height = 4)
# The final plot

df4 %>% ggplot(aes(x = Condition, y = freq)) + # you can change the x to whatever variable you're interested in
    geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
facet_wrap(~cells_in_gate, scales = "free") +
  ylab("Frequency") +
  xlab("Condition") +
  ylim(c(-0.001,NA)) + # This ensures that our x axis starts at zero, but feel free to remove this line
  theme_classic() +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
stat_compare_means(comparisons = list(c(1,2))) + ggtheme() + NoLegend()
#ggsave("../figures/subset_characterization/treg_gate.svg",
#       width = 14, height = 10, units = "cm")

In [None]:
df4 %>% dplyr::filter(Patient_ID != 116)  %>% 
ggplot(aes(x = Condition, y = freq)) + # you can change the x to whatever variable you're interested in
    geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
facet_wrap(~cells_in_gate, scales = "free") +
  ylab("Frequency") +
  xlab("Condition") +
  ylim(c(-0.001,NA)) + # This ensures that our x axis starts at zero, but feel free to remove this line
  theme_classic() +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
stat_compare_means(comparisons = list(c(2,3)), paired = TRUE) + ggtheme()

In [None]:
ggsave(filename = "../figures/treg/treg_density_quantification.svg", width = 3, height = 5)

# GSEA Resting vs Activated Treg

We will estimate the activation score of Treg cells in the dataset using a gene set from the following study: GSE15659_RESTING_VS_ACTIVATED_TREG_DN. The gene set will be used to calculate module score in the Treg Seurat object. 

In [None]:
# GSE15659_RESTING_VS_ACTIVATED_TREG_DN

genes  <- c('LMNTD2-AS1', 'POLR2F', 'POLR3F', 'POP7', 'PPHLN1', 'PPM1B', 'PPM1K', 'PPP2CA', 'PPP2R1A', 
'PPP2R5A', 'DESI2', 'PPY', 'PRAM1', 'PRDX3', 'PRDX4', 'PRG3', 'PRKAG3', 'PRKX', '', 'PROCR', 'PSEN1', 
'PSMA2', 'PSMG2', 'PSPN', 'PSRC1', 'PTK2B', 'HACD4', 'PTPN23', 'NECTIN4', 'PYGL', 'PYHIN1', 'RAB2B', 
'RAB9A', 'RAMP1', 'RAN', 'RANBP9', 'RANGRF', 'RAP2B', 'RBCK1', 'RBM41', 'RCOR1', 'RD3', 'RGS17', 
'RGS4', 'RHOC', 'RHOU', 'RIT1', 'RNF10', 'RNF150', 'RNF167', 'RNF214', 'ROCK1', 'ROS1', 'RPL26L1', 
'RRM1', 'RSPH1', 'RSPH10B2', 'RSPH3', 'RTP1', 'RUSC1', 'S100P', 'S1PR2', 'SAGE1', 'SAP30BP', 
'SARS2', 'SCN8A', 'SCO2', 'SDC3', 'SEC24D', 'SEC61B', 'SEPHS2', 'SEPTIN3', 'SF3B1', '', 'SGO2', 
'SH2D1A', 'SHISA6', 'SLC16A4', 'SLC22A18', 'SLC24A5', 'SLC25A3', 'SLC25A46', 'SLC26A6', 'SLC35F3', 
'SLC39A1', 'SLC43A3', 'SLC44A3', 'SLC4A9', 'SLC6A7', 'SLC9A6', 'SLC9A8', 'SLFNL1', 'SMAP2', 'SNAP91', 
'SNORA65', 'SNX3', 'SOX11', 'SOX2-OT', 'SPATA2', 'SPATA22', 'SPATA5L1', 'SPATC1', 'SPATS2L', 'SRL', 
'DENND2B', 'ST7-AS1', 'STARD4', 'STK25', 'STK32A', 'STK36', 'STX10', 'SUMO4', 'SUSD1', 'SYN2', 
'TBC1D1', 'TBC1D4', 'TDRD7', 'TEX12', 'TFF2', 'THAP1', 'THRB', 'TIFAB', 'TIPRL', 'TK1', 'TMBIM1', 
'TMEM126A', 'TMEM139', 'TMEM208', 'TMEM70', 'EMC4', 'TNF', 'TNFAIP3', 'TNFSF12', 'TOMM22', 'TOP1', 
'TOR1AIP1', 'TP53INP2', 'TPI1', 'TPMT', 'TRABD', 'TRIM16', 'TRIM55', 'TRIML1', 'TRIP12', 'TRIP6', 
'TRPM6', 'TSNAX', 'EIPR1', 'GFUS', 'TTC33', 'EMC2', 'TTC7A', 'TTL', 'TTTY13', 'TULP2', 'TUSC2', 
'TYK2', 'U2AF1', 'UBA3', 'UBE2D1', 'UBE2U', 'UBFD1', 'UBL7', 'UBR5', 'UGGT1', 'UGT2B17', 'UNC13D', 
'UTS2B', 'VAMP3', 'VCPIP1', 'VPS28', 'WARS1', 'WDHD1', 'DNAAF10', 'CCN6', 'GET1', 'XAF1', 'YY1', 
'ZAN', 'ZBED6', 'ZC3H13', 'ZCCHC9', 'ZDHHC8BP', 'ZFAND5', 'ZFC3H1', 'ZMIZ2', 'ZNF28', 'ZNF287', 'ZNF324B', 
'ZNF34', 'ZNF385D', 'ZNF394', 'ZNF414', 'ZNF596', 'ZNF618', 'ZNF663P', 'ZNF678', 'POLR1H', 'ZNRF2', 'ZPBP2')



treg <- AddModuleScore(
  object = treg,
  features = list(c(genes)),
  search = F,
  ctrl = 50,
  nbin = 50,
  assay = "RNA",
  name = 'GSE15659_RESTING_VS_ACTIVATED_TREG_DN_')

In [None]:
options(repr.plot.width = 5, repr.plot.height = 4)

FeaturePlot(treg, features = "GSE15659_RESTING_VS_ACTIVATED_TREG_DN_1", cols = c("#2d95ffff","white","#ff4140ff"),
           pt.size = 3, raster = T)



# Trajectory analysis

To analyze the trajectory of Treg cells, we will use the [Slingshot package](https://www.bioconductor.org/packages/release/bioc/vignettes/slingshot/inst/doc/vignette.html).

In [None]:
library(grDevices)
library(RColorBrewer)
library(slingshot)

As the input for the Slingshot analysis is SCE object, we need to convert our Seurat object first. We found that integrated assay causes problems upon conversion, so we will remove it and then add just the UMAP coordinates. 

In [None]:
DefaultAssay(treg) <- "RNA"

treg_no_integrated <- treg
treg_no_integrated[["integrated"]] <- NULL

Idents(treg_no_integrated) <- treg_no_integrated$seurat_clusters

sce <- as.SingleCellExperiment(treg_no_integrated)

reducedDim(sce, "PCA", withDimnames=TRUE) <- treg[['pca']]@cell.embeddings
reducedDim(sce, "UMAP", withDimnames=TRUE) <- treg_no_integrated[['umap']]@cell.embeddings

Run the Slingshot algotirhm:

In [None]:
sce <- slingshot(sce, clusterLabels = 'seurat_clusters', reducedDim = 'UMAP')

In [None]:
colors <- rev(colorRampPalette(brewer.pal(11,'Spectral')[-6])(100))
plotcol <- colors[cut(sce$slingPseudotime_1, breaks=100)]

In [None]:
options(repr.plot.width = 6, repr.plot.height = 6)
plot(reducedDims(sce)$UMAP, col = plotcol, pch=16, asp = 1)
lines(SlingshotDataSet(sce), lwd=2, col='black')

For visualization purposes we will add the Slingshot pseudotime score back to the Seurat object.

In [None]:
treg$slingPseudotime_1  <- sce$slingPseudotime_1

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4)

VlnPlot(subset(treg, Condition %in% c("Ctrl T0", "Dia T0") & Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20")),
        group.by = "Condition", features = "slingPseudotime_1", pt.size = 0) + ggpubr::stat_compare_means()

In [None]:
treg@meta.data %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Dia T1") & Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = slingPseudotime_1)) + # you can change the x to whatever variable you're interested in
  geom_boxplot(alpha = 0.3, aes(fill = Condition), outlier.shape = NA) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30")

In [None]:
options(repr.plot.width =4, repr.plot.height =6)
treg@meta.data %>% 
group_by(Patient_ID, Condition, Experiment_ID)  %>% 
dplyr::filter(Patient_ID != "116")  %>% 
summarize(mean_Sling = mean(slingPseudotime_1))  %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Dia T1") & Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = mean_Sling)) + # you can change the x to whatever variable you're interested in
  geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", method = "center") +
scale_shape_manual(values = c(21,22))+
  ylab("") +
  xlab("") +
  theme_classic() +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ggpubr::stat_compare_means(label.x= 1.2, label.y.npc = 0.9,
                           size = 7, label = "p.format", 
                          comparison = list(c(2,3)), paired = TRUE)+
ggtheme() +
 scale_y_continuous(limits = c(0,NA)) +
theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4)
treg@meta.data %>% 
group_by(Patient_ID, Condition, Experiment_ID)  %>% 
summarize(median_Sling = median(slingPseudotime_1))  %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Dia T1") & Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = median_Sling)) + # you can change the x to whatever variable you're interested in
  geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", method = "center") +
scale_shape_manual(values = c(21,22))+
  ylab("") +
  xlab("") +
  theme_classic() +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ggpubr::stat_compare_means(label.x= 1.2, label.y.npc = 0.9,
                           size = 7, label = "p.format", 
                          comparisons = list(c(1,2)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA)) +
theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data")

ggsave("../figures/subset_characterization/treg_pseudotime_in_condition.svg",
       width = 10, create.dir = TRUE, height = 12, units = "cm")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4)
treg@meta.data %>% 
group_by(Patient_ID, Condition, Experiment_ID)  %>% 
summarize(median_Sling = median(slingPseudotime_1))  %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0", "Dia T1") & Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = median_Sling)) + # you can change the x to whatever variable you're interested in
  geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", method = "center") +
scale_shape_manual(values = c(21,22))+
  ylab("") +
  xlab("") +
  theme_classic() +
#scale_fill_manual(values = c("#1874cdff","#c41515ff"))+
ggpubr::stat_compare_means(label.x= 1.2, label.y.npc = 0.9,
                           size = 7, label = "p.format", 
                          comparisons = list(c(1,2)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA)) +
theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
treg_pseudotime  <- treg@meta.data %>% 
mutate(ntile_sling = ntile(slingPseudotime_1, n = 20))  %>% 
group_by(Condition, ntile_sling) %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% 
dplyr::select(-n)  %>% 
pivot_wider(names_from = "ntile_sling", values_from = "freq", values_fill = 0) 

treg_pseudotime2  <- treg_pseudotime  %>% pivot_longer(3:ncol(treg_pseudotime), names_to = "ntile_sling", values_to = "freq")  

In [None]:
treg_pseudotime2

In [None]:
mtx  <- treg@meta.data %>% 
mutate(ntile_sling = ntile(slingPseudotime_1, n = 20),
       ntile_score = ntile(GSE15659_RESTING_VS_ACTIVATED_TREG_DN_1, n = 20))  %>% 
ungroup  %>% 
group_by(ntile_sling, ntile_score)  %>% 
tally()  %>% 
pivot_wider(names_from = ntile_score, values_from = n, values_fill = 0)  %>% 
column_to_rownames("ntile_sling")  %>% 
as.matrix()

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
pheatmap::pheatmap(mtx, cluster_rows = F, cluster_cols = F, 
                  color=colorRampPalette(c("white", "grey95", "indianred2","darkred"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

options(repr.plot.width = 6, repr.plot.height = 5)
pheatmap::pheatmap(mtx, cluster_rows = F, cluster_cols = F, 
                  color=colorRampPalette(c("white", "grey95", "indianred2","darkred"))(50), 
         border_color = "white", width = 6, height = 5,
                  fontsize = 9, filename = "../figures/treg_validation/trajectory_correlation_hm.pdf")

In [None]:
options(warn = -1)

In [None]:
treg@meta.data  <- treg@meta.data %>% 
mutate(ntile_sling = ntile(slingPseudotime_1, n = 20))

In [None]:
avgexp  <- AggregateExpression(treg, group.by = c("ntile_sling"), 
                             features = c("NELL2","IL7R","CCR7","TCF7","SELL",
                                          "CTLA4","IL10RA","IL2RA","ENTPD1",
                                          "TNFRSF9","CCR4"),
                             assay = "RNA", return.seurat = F)

In [None]:
pheatmap::pheatmap(avgexp$RNA, cluster_rows = F, cluster_cols = F, scale = "row",
                  color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 6, height = 5)

## Trajectories in healthy and Dia

In [None]:
library(tradeSeq)
library(condiments)
theme_set(theme_classic())

In [None]:
umap_coord  <- treg@reductions$umap@cell.embeddings

In [None]:
treg$umap_1 = unname(umap_coord[,1])

In [None]:
treg$umap_2 = unname(umap_coord[,2])

In [None]:
df  <- treg@meta.data  %>% dplyr::select(Condition, Sample_ID, Experiment_ID, Dim1 = umap_1, Dim2 = umap_2, cl = seurat_clusters)
df_filt  <- df  %>% dplyr::filter(Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20"))


In [None]:
df

In [None]:
df$Condition2  <- substr(df$Condition, 1,3)

In [None]:
scores <- imbalance_score(Object = df_filt %>% dplyr::select(Dim1, Dim2) %>% as.matrix(),
                          conditions = df_filt$Condition)

In [None]:
df_filt$scores <- scores$scores
df_filt$scaled_scores <- scores$scaled_scores

In [None]:
ggplot(df_filt, aes(x = Dim1, y = Dim2, col = scores)) +
  geom_point() +
  scale_color_viridis_c(option = "C")

In [None]:
options(repr.plot.width = 7.5, repr.plot.height = 5)
ggplot(df_filt, aes(x = Dim1, y = Dim2, col = scaled_scores)) +
  geom_point() +
  scale_color_viridis_c(option = "C")

In [None]:
rm(list = ls())

In [None]:
pseudotime_test  <- slingPseudotime(sce)

In [None]:
df_filt

In [None]:
df_filt$cells  <- rownames(df_filt)

In [None]:
pseudotime_test

In [None]:
psts <- slingPseudotime(sce) %>%
  as.data.frame() %>%
  mutate(cells = rownames(.),
         conditions = df$Condition,
         Condition2 = df$Condition2,
         ) %>%
dplyr::filter(cells %in% df_filt$cells)  %>% 
  pivot_longer(starts_with("Lineage"), values_to = "pseudotime", names_to = "lineages")

In [None]:
psts 

In [None]:
options(repr.plot.width = 8, repr.plot.height = 6)
ggplot(psts, aes(x = pseudotime, color = conditions, fill = conditions)) +
  geom_density(alpha = .02) +
  #scale_fill_brewer(type = "qual") +
  facet_wrap(~lineages) +
  theme(legend.position = "bottom") +
ggtheme() +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) 

In [None]:
ggsave("../figures/treg_validation/trajectory_progression.svg", width = 20, height = 13, units = "cm")

In [None]:
psts2  <- left_join(psts, treg@meta.data  %>% dplyr::select(cells = barcode, Sample_char, Sample_ID))

In [None]:
p  <- psts2  %>% 
dplyr::filter(Sample_ID != "454")  %>% 
ggplot(aes(x = pseudotime, color = conditions, fill = as.factor(Sample_ID))) +
  geom_density(alpha = .02) +
  #scale_fill_brewer(type = "qual") +
  facet_wrap(~lineages) +
  theme(legend.position = "bottom") +
ggtheme() 
#   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
#scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) 
plot(p)

In [None]:
p2  <- psts2  %>% 
dplyr::filter(Sample_ID != "454")  %>% 
ggplot(aes(x = pseudotime, color = conditions, fill = as.factor(Sample_ID))) +
  geom_density(alpha = .02) +
  #scale_fill_brewer(type = "qual") +
  facet_wrap(~Sample_ID) 


p <- ggplot_build(p2)

# These are the columns of interest    
p$data[[1]]$x
p$data[[1]]$density
p$data[[1]]$fill


In [None]:
options(repr.plot.width = 3, repr.plot.height = 5)

p$data[[1]]  %>% 
group_by(PANEL, colour)  %>% 
slice_max(order_by = density, n = 1)  %>% 
mutate(colour = recode_factor(colour, "#00BA38" = "Dia T1", "#619CFF" = "Dia T0", "#F8766D" = "Ctrl T0"))  %>% 
mutate(colour = factor(colour, levels = c("Ctrl T0", "Dia T0", "Dia T1")))  %>% 
ggplot(aes(x = colour, y = x, fill = colour)) +
 geom_violin(alpha = 0.3, aes(fill = colour), scale = "width") +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = colour), shape = 21) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, label = "p.format", comparisons = list(c(1,2),c(2,3)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() 


In [None]:
ggsave("../figures/treg_validation/trajectory_max_density.svg", width = 7.5, height = 9.5, units = "cm")

In [None]:
treg_sub  <- subset(treg_no_integrated, Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))

In [None]:
sce_filtered <- as.SingleCellExperiment(treg_sub)

reducedDim(sce_filtered, "PCA", withDimnames=TRUE) <- treg_sub[['pca']]@cell.embeddings
reducedDim(sce_filtered, "UMAP", withDimnames=TRUE) <- treg_sub[['umap']]@cell.embeddings

In [None]:
sce_filtered <- slingshot(sce_filtered, clusterLabels = 'seurat_clusters', reducedDim = 'UMAP')

In [None]:
sce_filtered

In [None]:
pseudotime_test <- slingPseudotime(sce_filtered)

In [None]:
pseudotime_test

In [None]:
df_p  <- data.frame(sling1 = sce$slingPseudotime_1
                   sling1_ntile = ntile(sce$slingPseudotime_1, n = 10),
                   sling2_ntile = ntile(sce$slingPseudotime_2, n = 10),
                   sling3_ntile = ntile(sce$slingPseudotime_3, n = 10),
                   sling4_ntile = ntile(sce$slingPseudotime_4, n = 10),
                   sling5_ntile = ntile(sce$slingPseudotime_5, n = 10),
                   clone_ntile = ntile(gut_cdn$clone_abundance, n = 10)
                   )

df_p  %>% 
dplyr::filter(!is.na(sling1_ntile) & !is.na(clone_ntile))  %>% 
group_by(sling1_ntile, clone_ntile)  %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

df_p  %>% 
dplyr::filter(!is.na(sling1_ntile) & !is.na(clone_ntile))  %>% 
group_by(sling1_ntile, clone_ntile)  %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% 
ggplot(aes(x = sling1_ntile, y = clone_ntile)) +
geom_point(aes(size = freq*100, colour = freq*100)) + 
  theme_bw() + scale_size_continuous(range=c(7,12)) +
  geom_text(aes(label = round(freq*100, digits = 1))) + 
  scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon", midpoint = 2) 

In [None]:
scores <- imbalance_score(Object = df %>% select(Dim1, Dim2) %>% as.matrix(),
                          conditions = df$conditions)
df$scores <- scores$scores
df$scaled_scores <- scores$scaled_scores

In [None]:
df_p  <- data.frame(sling1 = sce$slingPseudotime_1, 
                  sling2 = sce$slingPseudotime_2,
                 sling3 = sce$slingPseudotime_3,
                  sling4 = sce$slingPseudotime_4,
                  sling5 = sce$slingPseudotime_5,
                 clone_ab = gut_cdn$clone_abundance,
                 log_clone_ab = gut_cdn$log_clone_abundance,
                   sling1_ntile = ntile(sce$slingPseudotime_1, n = 10),
                   sling2_ntile = ntile(sce$slingPseudotime_2, n = 10),
                   sling3_ntile = ntile(sce$slingPseudotime_3, n = 10),
                   sling4_ntile = ntile(sce$slingPseudotime_4, n = 10),
                   sling5_ntile = ntile(sce$slingPseudotime_5, n = 10),
                   clone_ntile = ntile(gut_cdn$clone_abundance, n = 10)
                   )

df_p  %>% 
dplyr::filter(!is.na(sling1_ntile) & !is.na(clone_ntile))  %>% 
group_by(sling1_ntile, clone_ntile)  %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

df_p  %>% 
dplyr::filter(!is.na(sling1_ntile) & !is.na(clone_ntile))  %>% 
group_by(sling1_ntile, clone_ntile)  %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))  %>% 
ggplot(aes(x = sling1_ntile, y = clone_ntile)) +
geom_point(aes(size = freq*100, colour = freq*100)) + 
  theme_bw() + scale_size_continuous(range=c(7,12)) +
  geom_text(aes(label = round(freq*100, digits = 1))) + 
  scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon", midpoint = 2) 

# Gene correlation

In [None]:
md_cpept  <- read_xlsx("../data/metadata_v07.xlsx")  %>% 
mutate(Patient_Time = paste(patient, time_taken))  %>% 
       dplyr::select(Patient_Time, fasting_cpept_T1, fasting_cpept_1, c_peptide_change)  %>% unique

md_cpept   <- md_cpept  %>% mutate(group = substr(Patient_Time,1,1))  %>% filter(group == 1)

In [None]:
md_cpept$Patient_Time  <- paste0("g", md_cpept$Patient_Time)

In [None]:
treg

In [None]:
ds_mtx  <- AverageExpression(treg, group.by = "Patient_Time", return.seurat = F, assays = "RNA",
                            features = c("IL10RA","IL4R"))

In [None]:
ds_mtx$RNA

In [None]:
rna  <- ds_mtx$RNA  %>% 
as.data.frame()  %>% 
rownames_to_column("gene")  %>% 
pivot_longer(!gene, names_to = "Patient_Time", values_to = "expression")  %>% 
 left_join(md_cpept)  

In [None]:
rna

In [None]:
calc_one_pop  <- function(select_gene){
    
    df  <- rna  %>% dplyr::filter(gene == select_gene)
    cor  <- cor.test(df$expression, df$fasting_cpept_1)
    res_df  <- data.frame(gene = select_gene,
                           cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*2>1,1,cor$p.value*2))
    return(res_df)
}


In [None]:
calc_one_pop("IL10RA")

In [None]:
df  <- rna  %>% dplyr::filter(gene == "IL10RA")
p <- df %>%  
 ggplot(aes(x=expression, y=fasting_cpept_T1)) +
 geom_point(shape = 16, size = 2) +
 geom_smooth(method=lm, alpha = 0.2) + ggtitle(paste("IL10RA", "in", "Treg")) + 
stat_cor(size = 7) + theme_classic() + ggtheme()
print(p)

#ggsave(filename = paste0("../figures/correlation_populations/IL10RinTreg.svg"), width = 9, height = 4)


In [None]:
df  <- rna  %>% dplyr::filter(gene == "IL4R")
p <- df %>%  
 ggplot(aes(x=expression, y=fasting_cpept_T1)) +
 geom_point(shape = 16, size = 2) +
 geom_smooth(method=lm, alpha = 0.2) + ggtitle(paste("IL4R", "in", "Treg")) + 
stat_cor(size = 7) + theme_classic() + ggtheme()
print(p)

#ggsave(filename = paste0("../figures/correlation_populations/IL4RRinTreg.svg"), width = 4.5, height = 4)


# GSEA

## GSEA - Zemmour et al. IPEX

In [None]:
fc.df  <- read_csv("../../240218_VN_Diabetes_V05/tables/fold_change/cd4_l3_treg.csv")  %>% as.data.frame()


In [None]:
fc.df

In [None]:
zemmour  <- read_xlsx("../../240218_VN_Diabetes_V05/data/published_data/Zemmour_2018/zemmour2.xlsx")

zemmour is ipex vs healthy

In [None]:
fc.df  %>% dplyr::filter(cluster == "Dia")  %>% left_join(zemmour) 

In [None]:
ipex_in_our  <- fc.df  %>% dplyr::filter(cluster == "Dia")  %>% left_join(zemmour)  %>% 
dplyr::filter(!is.na(Average_FC_in_Treg))

In [None]:
ipex_in_our

In [None]:
treg_ipex_up  <- ipex_in_our  %>% dplyr::filter(Average_FC_in_Treg>1)  %>% pull(gene)

In [None]:
treg_ipex_down  <- ipex_in_our  %>% dplyr::filter(Average_FC_in_Treg<1)  %>% pull(gene)

In [None]:
# CD4L1
fc.df  <- read_csv("../../240218_VN_Diabetes_V05/tables/fold_change/cd4_l3_treg.csv")  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Dia")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)

In [None]:
library(fgsea)

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)

p  <- plotEnrichment(treg_ipex_up,
               ranks) + labs(title="treg_ipex_up") + ylim(-0.7,0.7) + ggtheme() +

plotEnrichment(treg_ipex_down,
               ranks) + labs(title="treg_ipex_down") + ylim(-0.7,0.7) + ggtheme()


In [None]:
p

In [None]:
dir.create("../figures/gsea/")

In [None]:
ggsave(p, filename = paste0("../figures/gsea/treg_zemmour.svg"), width = 24, height = 12, units = "cm")

In [None]:
fgseaRes <- fgsea(pathways = list(treg_ipex_up = treg_ipex_up, treg_ipex_down = treg_ipex_down), 
                 stats = ranks)

In [None]:
fgseaRes

In [None]:
ipex_in_our

In [None]:
ipex_in_our_rank  <- ipex_in_our  %>% dplyr::select(avg_log2FC, Average_FC_in_Treg)  %>% 
mutate_each(funs(dense_rank(-.))) 

In [None]:
ipex_in_our_rank$gene  <- ipex_in_our$gene

In [None]:
ipex_in_our_rank

In [None]:
ipex_in_our_rank %>% mutate(decile_ipex = ntile(Average_FC_in_Treg, 10),
                            dec_our = ntile(avg_log2FC, 10)
                            )  %>% group_by(decile_ipex, dec_our)  %>% tally

In [None]:
ipex_in_our_rank

In [None]:

options(repr.plot.width = 6, repr.plot.height = 5)
ipex_in_our_rank %>% mutate(decile_ipex = ntile(Average_FC_in_Treg, 8),
                            dec_our = ntile(avg_log2FC, 8)
                            )  %>% group_by(decile_ipex, dec_our)  %>% tally  %>% 
ggplot(aes(decile_ipex, dec_our)) + 
  geom_point(aes(size = n, colour = n)) + 
  theme_bw() + scale_size_continuous(range=c(7,12)) +
scale_y_reverse()+
scale_x_reverse()+
coord_flip()+
  geom_text(aes(label = n), size = 5) + 
  scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "#B07AA1", midpoint = 2) +
  theme_bw() + ggtheme()

ggsave("../figures/gsea/zemmour_treg_genes_corr.svg", width = 13, height = 11, units = "cm")

In [None]:
ipex_in_our_rank %>% mutate(decile_ipex = ntile(Average_FC_in_Treg, 8),
                            decile_our = ntile(avg_log2FC, 8)
                            )  %>% dplyr::filter(decile_ipex == 8 & decile_our == 8)  %>% pull(gene)

## GSEA - Borna et al. IPEX


In [None]:
borna  <- readRDS("../../240218_VN_Diabetes_V05/data/published_data/Borna_2023/GSE247274_unstim.rds")

In [None]:
DimPlot(borna)

In [None]:
borna$Gated_and_FOXP3  %>% table

In [None]:
borna$Gated_and_FOXP3  %>% table

In [None]:
borna$cluster_names_mem  %>% table

In [None]:
DimPlot(borna, group.by = "status")

In [None]:
DimPlot(borna, group.by = "Gated_and_FOXP3")

### Cells clustering as Tregs

In [None]:
borna_treg  <- subset(borna, cluster_names_mem %in% c("Treg memory 2", "Treg memory 1", "Treg naive"))

In [None]:
Idents(borna_treg)  <- borna_treg$status
    mrk  <- FindAllMarkers(borna_treg)

In [None]:
borna_hd_genes  <- mrk  %>% dplyr::filter(avg_log2FC>0 & cluster == "HD")  %>% pull(gene)

In [None]:
borna_hd_genes  %>% length

In [None]:
borna_ipex_genes  <- mrk  %>% dplyr::filter(avg_log2FC>0 & cluster == "IPEX")  %>% pull(gene) 

In [None]:
borna_ipex_genes  %>% length

In [None]:
fc.df  <- read_csv("../../240218_VN_Diabetes_V05/tables/fold_change/cd4_l3_treg.csv")  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Dia")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)


In [None]:
fgsea_bulk <- fgsea(pathways = list(Borna_UP_in_Ctrl = borna_hd_genes,
                                    Borna_UP_in_IPEX = borna_ipex_genes
                                    ), 
                                      stats    = ranks)
fgsea_bulk

In [None]:
fgsea_bulk$leadingEdge

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)

p  <- plotEnrichment(borna_hd_genes,
               ranks) + labs(title="borna_hd_genes") + ylim(-0.7,0.7) + ggtheme() +

plotEnrichment(borna_ipex_genes,
               ranks) + labs(title="borna_ipex_genes") + ylim(-0.7,0.7) + ggtheme()


p

dir.create("../figures/gsea/")

ggsave(p, filename = paste0("../figures/gsea/treg_borna.svg"), width = 24, height = 12, units = "cm")

### Only cells sorted as Tregs

In [None]:
options(repr.plot.width = 10, repr.plot.height = 6)

DimPlot(borna, group.by = "Gated_and_FOXP3")

In [None]:
options(repr.plot.width = 16, repr.plot.height = 12)

DimPlot(borna, group.by = "cluster_names_mem", split.by = "Gated_and_FOXP3", ncol = 3)

In [None]:
borna_treg  <- subset(borna, Gated_and_FOXP3 %in% c("FOXP3_plus_Treg", "FOXP3_minus_Treg"))

In [None]:
Idents(borna_treg)  <- borna_treg$status
    mrk  <- FindAllMarkers(borna_treg)

In [None]:
borna_hd_genes  <- mrk  %>% dplyr::filter(avg_log2FC>0 & cluster == "HD")  %>% pull(gene)

In [None]:
borna_ipex_genes  <- mrk  %>% dplyr::filter(avg_log2FC>0 & cluster == "IPEX")  %>% pull(gene)

In [None]:
fc.df  <- read_csv("../../240218_VN_Diabetes_V05/tables/fold_change/cd4_l3_treg.csv")  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Dia")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)


In [None]:
fgsea_bulk <- fgsea(pathways = list(Borna_UP_in_Ctrl = borna_hd_genes,
                                    Borna_UP_in_IPEX = borna_ipex_genes
                                    ), 
                                      stats    = ranks)
fgsea_bulk

In [None]:
fgsea_bulk$leadingEdge

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)

p  <- plotEnrichment(borna_hd_genes,
               ranks) + labs(title="borna_hd_genes") + ylim(-0.7,0.7) + ggtheme() +

plotEnrichment(borna_ipex_genes,
               ranks) + labs(title="borna_ipex_genes") + ylim(-0.7,0.7) + ggtheme()


p

In [None]:
ggsave(p, filename = paste0("../figures/gsea/treg_borna.svg"), width = 24, height = 12, units = "cm")

# Analysis of Treg cells from HPAP

In [None]:
treg_hpap_spleen  <- readRDS("../data/published_data/HPAP/240623_treg_hpap_spleen_stacas_v06.rds")

In [None]:
DimPlot(treg_hpap_spleen)

### Density

In [None]:
density  <- treg_hpap_spleen@meta.data  %>% dplyr::select(Patient_ID, Disease)

density$x_umap  <- treg_hpap_spleen@reductions$umap@cell.embeddings[,1]
density$y_umap  <- treg_hpap_spleen@reductions$umap@cell.embeddings[,2]


library(ggplot2)
library(dplyr)
library(viridis)

colfunc <- colorRampPalette(c("white", "grey85","grey75","dodgerblue","green","yellow","red"))


In [None]:
options(repr.plot.width = 10, repr.plot.height = 5)

p1  <- density %>%
filter(Disease == "T1DM")  %>% 
  ggplot(aes(x=x_umap, y= y_umap)) + 
 geom_point(shape = "")  + geom_density_2d_filled(bins = 30)  + 
theme_classic() + ggtheme() +
xlim(-10,10)+  ylim(-5,5) +
scale_fill_manual(values = colfunc(30))  + 
ggtitle("T1DM") + NoLegend()

p2   <-   density %>%
filter(Disease == "Ctrl")  %>% 
  ggplot(aes(x=x_umap, y= y_umap)) + 
 geom_point(shape = "")  + geom_density_2d_filled(bins = 30)  + 
theme_classic() + ggtheme() +
xlim(-10,10)+  ylim(-5,5) +
scale_fill_manual(values = colfunc(30))  + 
ggtitle("Ctrl") + NoLegend()


p1 + p2

In [None]:
DefaultAssay(treg_hpap_spleen)  <- 'integrated'

In [None]:
treg_hpap_spleen <- FindClusters(treg_hpap_spleen, resolution = 0.1)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4.5)
DimPlot(treg_hpap_spleen, group.by = "seurat_clusters") + ggtitle("")

In [None]:
library(ragg)

In [None]:
DimPlot(treg_hpap_spleen, group.by = "seurat_clusters") + ggtitle("") +
theme(axis.text.x = element_blank(),
        axis.text.y = element_blank(),
        plot.title = element_text(hjust = 0.5, face = "italic"),
        
        axis.title = element_blank(),
        axis.line = element_blank(), 
        axis.ticks = element_blank()) +
  NoLegend()

ggsave("../figures/treg_hpap_dimplot.png", width = 9, height = 7, units = "cm")
ggsave("../figures/treg_hpap_dimplot.svg", width = 9, height = 7, units = "cm")

In [None]:
options(repr.plot.width = 8, repr.plot.height = 5)

seurat_meta_data <- treg_hpap_spleen@meta.data
seurat_meta_data$sample  <- seurat_meta_data$source
seurat_meta_data$sample  <- paste(seurat_meta_data$Patient_ID, seurat_meta_data$Tissue) 

# Create grouped dataframe, calculate the frequencies of clusters
df4 <- seurat_meta_data %>% group_by(sample, seurat_clusters) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
  mutate(freq = n / sum(n)) 


# As we've lost non-grouping variables, let's join them back
md_to_join <- seurat_meta_data %>% dplyr::select(sample, Disease, Patient_ID, Tissue) %>% unique()
df4  <- left_join(df4, md_to_join, by = "sample")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4)

seurat_meta_data <- treg_hpap_spleen@meta.data
seurat_meta_data$sample  <- seurat_meta_data$source
seurat_meta_data$sample  <- paste(seurat_meta_data$Patient_ID, seurat_meta_data$Tissue) 

# Create grouped dataframe, calculate the frequencies of clusters
df4 <- seurat_meta_data %>% group_by(sample, seurat_clusters) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
  mutate(freq = n / sum(n)) 


# As we've lost non-grouping variables, let's join them back
md_to_join <- seurat_meta_data %>% dplyr::select(sample, Disease, Patient_ID, Tissue) %>% unique()
df4  <- left_join(df4, md_to_join, by = "sample")

# The final plot

df4 %>% ggplot(aes(x = Disease, y = freq*100)) + # you can change the x to whatever variable you're interested in
 geom_violin(alpha = 0.3, aes(fill = Disease)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Disease), shape = 21) +  
facet_wrap(~seurat_clusters, scales = "free") +
scale_color_manual(values = c("dodgerblue","indianred2")) +
scale_fill_manual(values = c("dodgerblue","indianred2")) +

  ylab("Frequency") +
  xlab("Condition") +
ggpubr::stat_compare_means(label = "p.format", size = 5) +
  ylim(c(0,NA)) + # This ensures that our x axis starts at zero, but feel free to remove this line
  theme_classic() +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + NoLegend() +ggtheme() 


In [None]:
ggsave("../figures/subset_characterization/treg_HPAP_violin.svg",
       width = 14, height = 8, units = "cm")

In [None]:
options(repr.plot.width = 8, repr.plot.height = 5)

seurat_meta_data <- treg_hpap_spleen@meta.data
seurat_meta_data$sample  <- seurat_meta_data$source
seurat_meta_data$sample  <- paste(seurat_meta_data$Patient_ID, seurat_meta_data$Tissue) 

# Create grouped dataframe, calculate the frequencies of clusters
df4 <- seurat_meta_data %>% group_by(sample, seurat_clusters2) %>% 
  summarise(n = n()) %>% 
  unique() %>% 
  mutate(freq = n / sum(n)) 


# As we've lost non-grouping variables, let's join them back
md_to_join <- seurat_meta_data %>% dplyr::select(sample, Disease, Patient_ID, Tissue) %>% unique()
df4  <- left_join(df4, md_to_join, by = "sample")

# The final plot

df4 %>% ggplot(aes(x = Disease, y = freq*100)) + # you can change the x to whatever variable you're interested in
  geom_boxplot(outlier.shape = NA, alpha = 0.7, aes(fill = Disease)) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(position=position_jitter(0.05), size = 3, aes(fill = Disease), shape = 21) + # in aes, you can also use shape or fill (for the shapes that allow it)
  facet_wrap(~seurat_clusters2, scales = "free", ncol = 4) +
scale_color_manual(values = c("dodgerblue","indianred2")) +
scale_fill_manual(values = c("dodgerblue","indianred2")) +

  ylab("Frequency") +
  xlab("Condition") +
ggpubr::stat_compare_means(label = "p.format", size = 5) +
  ylim(c(0,NA)) + # This ensures that our x axis starts at zero, but feel free to remove this line
  theme_classic() +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + NoLegend() +ggtheme() 


In [None]:

df4 %>% 
mutate(Disease = ifelse(Disease == "Ctrl", "Ctrl","Dia"))  %>% 
ggplot(aes(x = Disease, y = freq*100)) + # you can change the x to whatever variable you're interested in
  geom_boxplot(outlier.shape = NA, alpha = 0.3, aes(fill = Disease)) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Disease), shape = 21) +
facet_wrap(~seurat_clusters2, scales = "free", ncol = 5) +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
  ylab("Frequency") +
  xlab("Condition") +
#ggpubr::stat_compare_means() +
  ylim(c(0,NA)) + # This ensures that our x axis starts at zero, but feel free to remove this line
  theme_classic() +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
ggtheme()

ggsave("../../240617_VN_Diabetes_V06/figures/treg_validation/hpap_treg_subsets_in_condition.svg",
       width = 16, height = 8, units = "cm")

In [None]:
VlnPlot(treg_hpap_spleen, features = c("IL2RA"), cols = c("dodgerblue","indianred2")) + 
ggpubr::stat_compare_means()

In [None]:
VlnPlot(treg_hpap_spleen, features = c("CD226"), cols = c("dodgerblue","indianred2")) + 
ggpubr::stat_compare_means(method = "t.test")

In [None]:
VlnPlot(treg_hpap_spleen, features = c("IL2RA","CD226"), cols = c("dodgerblue","indianred2"))

In [None]:
ggsave("../figures/subset_characterization/treg_hpap_il2ra_cd226.svg",
      width = 20, height = 12, units = "cm")

ggsave("../figures/subset_characterization/treg_hpap_il2ra_cd226.png",
      width = 20, height = 12, units = "cm")

In [None]:
VlnPlot(treg_hpap_spleen, features = c("IL4R","IL10RA"), cols = c("dodgerblue","indianred2"))

In [None]:
treg_hpap_spleen$Sample_ID  <- as.character(treg_hpap_spleen$Patient_ID)

In [None]:
# Extract the dataframe with seurat metadata
seurat_meta_data <- treg_hpap_spleen@meta.data
seurat_meta_data$sample  <- seurat_meta_data$Sample_ID

# Select genes of interest
gene_hits <- c("IL2RA","FOXP3","GZMK","TNFRSF9","CD226","IL4R","IL10RA","SELL","CCR7","TIGIT")

# Now we will calculate the percentage of expressing cells for each sample and we will merge the resulting dataframes
expr_data4 <- data.frame(genes = gene_hits)

# We will need a function that will convert any non-zero count to value 1
fns_replace <- function(x){ifelse(x>0,1,0)}

for(j in levels(factor(seurat_meta_data$sample))){
  
  # subset only selected cell type
  seu_sub_sample <- subset(treg_hpap_spleen, Sample_ID == j) 
  
  # select the rows corresponding to genes of interest
  index_subset <- which(rownames(seu_sub_sample@assays$RNA@counts) %in% gene_hits)
  
  # create a dataframe with genes of interest and cells of interest
  expr_data <- as.data.frame(seu_sub_sample@assays$RNA@counts[index_subset,]) 
  
  # convert expression to binary values
  expr_data2 <- expr_data %>% mutate(across(.fns = fns_replace))
  rownames(expr_data2) <- rownames(expr_data)
  
  # calculate average expresion (percentage of cells expressing the gene)
  expr_data3 <- rowMeans(expr_data2)
  
  # add zeroes in cases of no expression
  for(k in gene_hits){
    if(k %in% names(expr_data3) == F){expr_data3[[k]] <- 0}
  }
  
  expr_data3 <- as.data.frame(expr_data3)
  colnames(expr_data3) <- j
  expr_data3$genes <- rownames(expr_data3)
  
  # final dataframe with values in correct order (all cell type, loop results)
  expr_data4 <- left_join(expr_data4, expr_data3, by="genes")
  
}

In [None]:
expr_data4

In [None]:
expr_data5 <- as.data.frame(t(expr_data4))
colnames(expr_data5) <- expr_data5[1,]
expr_data5 <- expr_data5[2:nrow(expr_data5),]
expr_data5$sample <- rownames(expr_data5)
expr_data5 <- expr_data5 %>% pivot_longer( !sample, names_to = "gene", values_to = "pct_express")
expr_data5$Sample_ID  <- expr_data5$sample

In [None]:
# Add metadata per sample - select those that you will use in the plot below
md_to_join <- seurat_meta_data %>% dplyr::select(Sample_ID, Disease) %>% 
    ungroup %>% unique
expr_data5$Sample_ID  <- as.character(expr_data5$Sample_ID)
md2 <- left_join(expr_data5, md_to_join, by = "Sample_ID") %>% ungroup %>% unique

In [None]:
options(repr.plot.width = 10, repr.plot.height = 12)
# Plot the results
md2 %>% 

ggplot(aes(x = Disease, y = as.numeric(pct_express))) + 
  geom_boxplot(outlier.shape = NA) +
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(position=position_jitter(0.2), size = 2, aes(color = factor(Disease))) +
  theme_minimal() + 
  facet_wrap(~gene, scales = "free", ncol = 5) + 
  ylim(c(0,NA)) +
  ylab("Percentage of expressing cells") +
  theme(legend.title = element_blank()) + stat_compare_means(label = "p.format")

### Marker heatmap

In [None]:
genes_treg  <-  c("CCR7","TCF7","LEF1","SELL","BTG1","IL7R","CD226","LAG3",
                  "GZMK","HAVCR2","ICOS",
    "CTLA4","TIGIT","IL10RA","IL2RB","IL2RA","FOXP3","IKZF2","TNFRSF18",
                  "ENTPD1",
                  "TNFRSF9","HLA-DPA1")

In [None]:
avgexp = AverageExpression(treg_hpap_spleen, features = genes_treg, return.seurat = F, 
                           group.by = "seurat_clusters", assays = "RNA")

In [None]:
options(repr.plot.width = 9, repr.plot.height = 3.2)
pheatmap(t(avgexp$RNA), main = "", scale = "column", cluster_cols = T, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 14)

In [None]:

    options(repr.plot.width = 9, repr.plot.height = 3.2)
pheatmap(t(avgexp$RNA), main = "", scale = "column", cluster_cols = T, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 14, 
        filename = "../figures/treg_heatmap.pdf")

In [None]:
genes_treg  <-  c("TCF7","LEF1","NELL2","IL7R","IL4R","GZMK","CD226",
    "CCR4","CTLA4","TNFRSF9","FOXP3","IL2RA")

In [None]:
avgexp = AverageExpression(treg_hpap_spleen, features = genes_treg, return.seurat = F, 
                           group.by = "seurat_clusters2", assays = "RNA")

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
pheatmap(t(avgexp$RNA[match(c("TCF7","LEF1","NELL2","IL7R","IL4R","GZMK","CD226",
    "CCR4","CTLA4","TNFRSF9","FOXP3","IL2RA"),
rownames(avgexp$RNA)),]), main = "", scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
pheatmap(t(avgexp$RNA[match(c("TCF7","LEF1","NELL2","IL7R","IL4R","GZMK","CD226",
    "CCR4","CTLA4","TNFRSF9","FOXP3","IL2RA"),
rownames(avgexp$RNA)),]), main = "", scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 5, height = 3.3,
                  fontsize = 9, filename = "../../240617_VN_Diabetes_V06/figures/treg_validation/hpap_heatmap.pdf")

# FOXP3 deficient Treg cells

## Analysis of Treg cells from Schumann et al., 2020

In [None]:
library(data.table)

In [None]:
schumann  <- fread("../data/published_data/Schumann_2020/exprs_mat_raw.csv.gz")  %>% as.data.frame()

In [None]:
schumann[10000:10009,1:5]

In [None]:
rownames(schumann)  <- schumann$V1

In [None]:
schumann$V1  <- NULL

In [None]:
rownames(schumann[10000:10009,1:5])

In [None]:
schumann_seu <- CreateSeuratObject(schumann, min.cells = 3, min.features = 200)
  schumann_seu[["percent.mt"]] <- PercentageFeatureSet(schumann_seu, pattern = "^MT-")
  schumann_seu[["percent.rt"]] <- PercentageFeatureSet(schumann_seu, pattern = "^RP[LS]")

In [None]:
  # Remove TRAV, TRBV, mito and ribo genes
  seu_annots <- schumann_seu@meta.data
  seu_counts <- schumann[!grepl('TR[AB]V|MT-|RP[LS]|\\.\\d', rownames(schumann_seu)),]

  seu <- CreateSeuratObject(counts = seu_counts, 
                          project = "seu", 
                          min.cells = 3, 
                          min.features = 200, 
                          meta.data = seu_annots)

In [None]:
plan("sequential")

In [None]:
  seu <- NormalizeData(object = seu)
  seu <- ScaleData(seu, verbose = FALSE)
  seu <- FindVariableFeatures(seu, selection.method = "vst", nfeatures = 1000, 
                                       verbose = FALSE)
  seu <- RunPCA(seu, npcs = 20, verbose = FALSE)
  seu <- RunUMAP(seu, reduction = "pca", dims = 1:20)
  seu <- FindNeighbors(seu, dims = 1:20)
  seu <- FindClusters(seu, resolution = 0.9)

In [None]:
DimPlot(seu)

In [None]:
saveRDS(seu, "../data/published_data/Schumann_2020/schumann_treg.rds")

In [None]:
DimPlot(seu, group.by = "donor")

Add metadata.

In [None]:
md_schumann  <- read_csv("../data/published_data/Schumann_2020/mat_metadata.csv")

In [None]:
colnames(md_schumann)[1]  <- "barcode"

In [None]:
seu$barcode  <- colnames(seu)

In [None]:
md_schumann$barcode %in% seu$barcode  %>% table

In [None]:
md  <- seu@meta.data

In [None]:
md_schumann$orig.ident  <-  NULL
md_schumann$nCount_RNA <-  NULL
md_schumann$nFeature_RNA <-  NULL

In [None]:
md  <- left_join(md, md_schumann)

In [None]:
seu_backup  <- seu

In [None]:
seu@meta.data  <- md
rownames(seu@meta.data)  <- colnames(seu)

In [None]:
colnames(seu@meta.data)

In [None]:
DimPlot(seu, group.by = "ClusterNames_0.45")

In [None]:
DimPlot(seu, group.by = "KO")

In [None]:
DimPlot(seu, group.by = "donor")

In [None]:
DimPlot(seu, group.by = "stim")

In [None]:
FeaturePlot(seu, features = "CD4")
FeaturePlot(seu, features = "CD3D")
FeaturePlot(seu, features = "FOXP3")
FeaturePlot(seu, features = "KLRG1")
FeaturePlot(seu, features = "CD226")

In [None]:
FeaturePlot(seu, features = "TCF7")

In [None]:
FeaturePlot(seu, features = "GZMK")

In [None]:
  seu <- FindClusters(seu, resolution = 0.4)
DimPlot(seu, label = T)

In [None]:
mrk9  <- FindAllMarkers(seu, logfc.threshold = log(2))

In [None]:
FeaturePlot(seu, features = "USP18")

In [None]:
mrk9  %>% filter(cluster == 9)

In [None]:
seu$pop  %>% table

In [None]:
DimPlot(seu, label = T, group.by = "pop")

In [None]:
seu$KO  %>% table

In [None]:
for(i in c(1,3:11)){
    sub_seu  <- subset(seu, KO %in% c("ctrl", levels(factor(seu$KO))[i]))
    Idents(sub_seu)  <- sub_seu$KO
    mrk  <- FindAllMarkers(sub_seu)
    mrk$knockout  <- levels(factor(seu$KO))[i]
    if(i>1){
        df_all  <- rbind(df_all, mrk)
    } else {
        df_all = mrk
    }
}

In [None]:
write.csv(df_all, "../data/published_data/Schumann_2020/ko_DE_genes.csv")

In [None]:
plan("multisession", workers = 4)

### Fold changes for GSEA

In [None]:
options(future.globals.maxSize = 10000 * 1024^2)

In [None]:
for(i in c(1,3:11)){
    sub_seu  <- subset(seu, KO %in% c("ctrl", levels(factor(seu$KO))[i]))
    Idents(sub_seu)  <- sub_seu$KO
    mrk  <- FindAllMarkers(sub_seu, test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = T,  
                                   min.cells.feature = 1, return.thresh = 1)
    mrk$knockout  <- levels(factor(seu$KO))[i]
    if(i>1){
        df_all_all  <- rbind(df_all_all, mrk)
    } else {
        df_all_all = mrk
    }
}

In [None]:
write.csv(df_all_all, "../data/published_data/Schumann_2020/ko_fold_changes.csv")

In [None]:
options(future.globals.maxSize = 10000 * 1024^2)

In [None]:
sub_seu  <- subset(seu_unstim, KO %in% c("ctrl", "foxp3"))
    Idents(sub_seu)  <- sub_seu$KO
    fc_foxp3_ko  <- FindAllMarkers(sub_seu, test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = T,  
                                   min.cells.feature = 1, return.thresh = 1)

In [None]:
write.csv(fc_foxp3_ko, "../data/published_data/Schumann_2020/fc_foxp3_ko.csv")

# Borna 2023

In [None]:
borna  <- readRDS("../../240617_VN_Diabetes_V06//data/published_data/Borna_2023/GSE247274_unstim.rds")

In [None]:
borna_treg  <- subset(borna, cluster_names_mem %in% c("Treg memory 2", "Treg memory 1", "Treg naive"))

In [None]:
Idents(borna_treg)  <- borna_treg$status
    mrk  <- FindAllMarkers(borna_treg)

In [None]:
borna_hd_genes  <- mrk  %>% dplyr::filter(avg_log2FC>0 & cluster == "HD")  %>% pull(gene)

In [None]:
borna_ipex_genes  <- mrk  %>% dplyr::filter(avg_log2FC>0 & cluster == "IPEX")  %>% pull(gene)

In [None]:
fc.df  <- read_csv("../../240617_VN_Diabetes_V06/tables/fold_change/cd4_l3_treg.csv")  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Dia")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)


In [None]:
df1  <- data.frame(gene = borna_hd_genes,
           direction = "HD")

df2  <-   data.frame(gene = borna_ipex_genes,
                    direction = "IPEX")

In [None]:
write.csv(rbind(df1,df2),
         "../../240617_VN_Diabetes_V06/data/published_data/Borna_2023/borna_ipex_treg_markers.csv")

In [None]:
library(fgsea)

In [None]:
fgsea_bulk <- fgsea(pathways = list(Borna_UP_in_Ctrl = borna_hd_genes,
                                    Borna_UP_in_IPEX = borna_ipex_genes
                                    ), 
                                      stats    = ranks)
fgsea_bulk

In [None]:
plotEnrichment(borna_hd_genes,
               ranks) + labs(title="Borna enriched in HD") 


In [None]:
plotEnrichment(borna_ipex_genes,
               ranks) + labs(title="Borna enriched in HD") 

In [None]:
fgsea_bulk <- fgsea(pathways = list(Borna_UP_in_Ctrl = borna_hd_genes,
                                    Borna_UP_in_IPEX = borna_ipex_genes
                                    ), 
                                      stats    = ranks)
fgsea_bulk

In [None]:
plotEnrichment(borna_hd_genes,
               ranks) + labs(title="Borna enriched in HD") 

ggsave(filename = paste("../figures/treg/borna_ctrl.png"), width = 12, height = 9, units = "cm")
ggsave(filename = paste("../figures/treg/borna_ctrl.svg"), width = 12, height = 9, units = "cm")

In [None]:
plotEnrichment(borna_ipex_genes,
               ranks) + labs(title="Borna enriched in HD") 

ggsave(filename = paste("../figures/treg/borna_ipex.png"), width = 12, height = 9, units = "cm")
ggsave(filename = paste("../figures/treg/borna_ipex.svg"), width = 12, height = 9, units = "cm")

# GSEA Treg from FOXP3- mice

In [None]:
### Treg in Treg

library(fgsea)

# CD4L1
fc.df  <- read_csv("../tables/fold_change/cd4_l3_treg.csv")  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Dia")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)


In [None]:
treg_foxp3ko_up <- df_all  %>% filter(knockout == "foxp3" & cluster == "foxp3" & avg_log2FC>0)   %>% pull(gene)
treg_foxp3ko_down  <- df_all  %>% filter(knockout == "foxp3" & cluster == "ctrl" & avg_log2FC>0)   %>% pull(gene)

In [None]:
treg_foxp3ko_down

In [None]:
plotEnrichment(treg_foxp3ko_up,
               ranks) + labs(title="treg_foxp3ko_up") 

plotEnrichment(treg_foxp3ko_down,
               ranks) + labs(title="treg_foxp3ko_down") 


In [None]:
options(repr.plot.width = 14, repr.plot.height = 4)

for(i in 1:length(levels(factor(df_all$knockout)))){
   ko  <- levels(factor(df_all$knockout))[i]
treg_ko_up <- df_all  %>% filter(knockout == ko & cluster == ko & avg_log2FC>0)   %>% pull(gene)
treg_ko_down  <- df_all  %>% filter(knockout == ko & cluster == "ctrl" & avg_log2FC>0)   %>% pull(gene)
    
fgsea_bulk <- fgsea(pathways = list(UP_in_KO = treg_ko_up,
                                    UP_in_Ctrl = treg_ko_down), 
                                      stats    = ranks)

print(plotEnrichment(treg_ko_up,
               ranks) + labs(title=paste(ko, "UP in KO"),
                             subtitle = paste0("p-value = ",format.pval(fgsea_bulk$pval[2], digits = 2))) +

plotEnrichment(treg_ko_down,
               ranks) + labs(title=paste(ko, "UP in Ctrl"),
                            subtitle = paste0("p-value = ",format.pval(fgsea_bulk$pval[1], digits = 2))))

}

In [None]:
### Unstim only

In [None]:
df_all

In [None]:
options(repr.plot.width = 14, repr.plot.height = 4)

for(i in 1:length(levels(factor(df_all$knockout)))){
   ko  <- levels(factor(df_all$knockout))[i]
treg_ko_up <- df_all  %>% filter(knockout == ko & cluster == ko & avg_log2FC>0 & stim == "Unstim")   %>% pull(gene)
treg_ko_down  <- df_all  %>% filter(knockout == ko & cluster == "ctrl" & avg_log2FC>0 & stim == "Unstim")   %>% pull(gene)
    
fgsea_bulk <- fgsea(pathways = list(UP_in_KO = treg_ko_up,
                                    UP_in_Ctrl = treg_ko_down), 
                                      stats    = ranks)

#print(plotEnrichment(treg_ko_up,
#               ranks) + labs(title=paste(ko, "UP in KO, unstim"),
#                             subtitle = paste0("p-value = ",format.pval(fgsea_bulk$pval[2], digits = 2)))) 

print(plotEnrichment(treg_ko_down,
               ranks) + ylim(-0.62, 0.15) + labs(title=paste(ko, "UP in Ctrl, unstim"),
                            subtitle = paste0("p-value = ",format.pval(fgsea_bulk$pval[1], digits = 2))))

ggsave(filename = paste("../figures/treg/",ko,".png"), width = 12, height = 9, units = "cm")
ggsave(filename = paste("../figures/treg/",ko,".svg"), width = 12, height = 9, units = "cm")

}

In [None]:
for(i in 1:length(levels(factor(df_all$knockout)))){
   ko  <- levels(factor(df_all$knockout))[i]
treg_ko_up <- df_all  %>% filter(knockout == ko & cluster == ko & avg_log2FC>0 & stim == "IL12")   %>% pull(gene)
treg_ko_down  <- df_all  %>% filter(knockout == ko & cluster == "ctrl" & avg_log2FC>0 & stim == "IL12")   %>% pull(gene)
    
fgsea_bulk <- fgsea(pathways = list(UP_in_KO = treg_ko_up,
                                    UP_in_Ctrl = treg_ko_down), 
                                      stats    = ranks)

print(plotEnrichment(treg_ko_up,
               ranks) + labs(title=paste(ko, "UP in KO, IL12"),
                             subtitle = paste0("p-value = ",format.pval(fgsea_bulk$pval[2], digits = 2))) +

plotEnrichment(treg_ko_down,
               ranks) + labs(title=paste(ko, "UP in Ctrl, IL12"),
                            subtitle = paste0("p-value = ",format.pval(fgsea_bulk$pval[1], digits = 2))))

}