# Part 10: DEG analysis 

In this document, we will focus on the analysis of differential expressed genes (DEG). We have already obtained the DEG before in parts 6 and 7 of the analysis. The results will be loaded and built upon.  

In [None]:
source("diabetes_analysis_v07.R")

rank_score_func <- function(df){
df <- df %>% mutate(score = -1*log(p_val_adj+(10^-310))*avg_log2FC*(pct.1/(pct.2+10^-300)))
return(df)
}



We will be using some additional libraries:

In [None]:
library(msigdbr)
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(EnsDb.Hsapiens.v86)
library(fgsea)
library(pheatmap)

Formating of plots in ggplot:

In [None]:
ggtheme <- function() {
  theme(
    axis.text = element_text(size = 20),
    axis.title = element_text(size = 20),
    text = element_text(size = 20, colour = "black"),
    legend.text = element_text(size = 20),
    legend.key.size =  unit(10, units = "points")
    
  )
}

# Load markers, add EnsID

We will now load the markers we calculated in parts 6 and 7 of the analysis pipeline. As we have the The precalculated gene tables can be also downloaded from Zenodo: [DOI: 10.5281/zenodo.14222418](https://zenodo.org/records/14222418) 

In [None]:
markers_cd4  <- read_csv("../tables/de_genes/240319_cd4_all_markers_without_sex.csv")
markers_cd8  <- read_csv("../tables/de_genes/240319_cd8_all_markers_without_sex.csv")

As we have the sample information stored, we can bind markers all toghether. 

In [None]:
all_markers  <- rbind(markers_cd4, markers_cd8)

In [None]:
all_markers$source  %>% table

We will now clean the metadata of the DEG a bit and add the Up or Down direction, which indicated the direction of the change. Up means a gene is expressed more in the worse condition, i.e. T1D compared to Healthy, T1D T0 compared to T1D T1, no partial remission at T1 compared to partial remission at T1, etc. 

In [None]:
all_markers  <- all_markers  %>% mutate(
direction = case_when(test_type == "Dia T1 vs Ctrl T0" & cluster == "Ctrl T0" ~ "Down",
                      test_type == "Dia T1 vs Ctrl T0" & cluster == "Dia T1" ~ "Up",
                      
                      test_type == "Dia T0 vs Ctrl T0" & cluster == "Ctrl T0" ~ "Down",
                      test_type == "Dia T0 vs Ctrl T0" & cluster == "Dia T0" ~ "Up",
                      
                      test_type == "Dia T0 vs Dia T1" & cluster == "Dia T0" ~ "Up",
                      test_type == "Dia T0 vs Dia T1" & cluster == "Dia T1" ~ "Down",
                      
                      test_type == "PR_0 T0 vs PR_0 T1" & cluster == "PR_0 T0" ~ "Up",
                      test_type == "PR_0 T0 vs PR_0 T1" & cluster == "PR_0 T1" ~ "Down",
                      
                      test_type == "PR_0 T0 vs PR_1 T0" & cluster == "PR_0 T0" ~ "Up",
                      test_type == "PR_0 T0 vs PR_1 T0" & cluster == "PR_1 T0" ~ "Down",
                      
                      test_type == "PR_0 T1 vs PR_1 T1" & cluster == "PR_0 T1" ~ "Up",
                      test_type == "PR_0 T1 vs PR_1 T1" & cluster == "PR_1 T1" ~ "Down",
                      
                      test_type == "PR_1 T0 vs PR_1 T1" & cluster == "PR_1 T0" ~ "Up",
                      test_type == "PR_1 T0 vs PR_1 T1" & cluster == "PR_1 T1" ~ "Down",
                      
                      test_type == "Keto_1 T0 vs Keto_0 T0" & cluster == "Keto_0 T0" ~ "Down",
                      test_type == "Keto_1 T0 vs Keto_0 T0" & cluster == "Keto_1 T0" ~ "Up"
                      
                     ))

In [None]:
all_markers  %>% group_by(cluster, test_type, direction)  %>% tally

Add Ensembl IDs for each gene as they are needed for some analyses.

In [None]:
geneIDs1 <- ensembldb::select(EnsDb.Hsapiens.v86, keys= (all_markers$gene  %>% unique ), 
                              keytype = "SYMBOL", columns = c("SYMBOL","ENTREZID"))

In [None]:
colnames(geneIDs1)  <- c("gene", "entrezid")

In [None]:
all_markers2  <- all_markers  %>% 
                    left_join(geneIDs1) 

In [None]:
all_markers2

In [None]:
all_markers$gene_direction  <- paste(all_markers$gene, all_markers$direction)

In [None]:
all_markers2$gene_direction  <- paste(all_markers2$gene, all_markers2$direction)

# Counts of DE genes

### RNA

Let's visualize the counts of DEG in all clusters.

In [None]:
options(repr.plot.width = 10, repr.plot.height = 5)
all_markers  %>% 
group_by(cluster, source, test_type, dataset)  %>% 
dplyr::filter(source == "scRNAseq_RNA")  %>% 
ggplot(aes(x = dataset)) +
geom_bar(aes(fill = test_type), position = "stack") + coord_flip() 

Split up different comparisons:

In [None]:
options(repr.plot.width = 20, repr.plot.height = 5)
all_markers  %>% 
group_by(cluster, source, test_type, dataset)  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type != "Cpept_HI_T1 vs Cpept_LO_T1")  %>% 
ggplot(aes(x = dataset)) +
facet_grid(cols = vars(test_type)) +
geom_bar(aes(fill = cluster), position = "stack") + coord_flip() 

Add a bit of formating.

In [None]:
all_markers  %>% 
group_by(cluster, source, test_type, dataset)  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type != "Cpept_HI_T1 vs Cpept_LO_T1")  %>% 
ggplot(aes(x = dataset)) +
facet_grid(cols = vars(test_type)) +
geom_bar(aes(fill = direction), position = "stack") + coord_flip() +
ggtheme() + 
theme_bw() + 
scale_fill_manual(values = c("#90c2f1ff", "#d0625dff")) + theme(axis.ticks.y = element_blank())


In [None]:
all_markers  %>% 
group_by(cluster, source, test_type, dataset)  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type != "Cpept_HI_T1 vs Cpept_LO_T1" &
             test_type != "PR_0 T0 vs PR_0 T1" & test_type != "PR_0 T1 vs PR_1 T1" &
             test_type != "PR_1 T0 vs PR_1 T1" )  %>% 
ggplot(aes(x = dataset)) +
facet_grid(cols = vars(test_type)) +
geom_bar(aes(fill = direction), position = "stack") + coord_flip() +
ggtheme() + 
theme_bw() + 
scale_fill_manual(values = c("#90c2f1ff", "#d0625dff")) + theme(axis.ticks.y = element_blank())


In [None]:
library(svglite)
dir.create("../figures/DE_genes/")
ggsave(filename = "../figures/DE_genes/count_of_RNA_markers.svg", width = 11, height = 4)

### CollecTRI

We can do the same thing of differentially active transcriptional factors estimated by DecoupleR package:

In [None]:
all_markers  %>% 
group_by(cluster, source, test_type, dataset)  %>% 
dplyr::filter(source == "scRNAseq_collecTRI")  %>% 
ggplot(aes(x = dataset)) +
geom_bar(aes(fill = test_type), position = "stack") + coord_flip() 

In [None]:
all_markers  %>% 
group_by(cluster, source, test_type, dataset)  %>% 
dplyr::filter(source == "scRNAseq_collecTRI")  %>% 
ggplot(aes(x = dataset)) +
facet_grid(cols = vars(test_type)) +
geom_bar(aes(fill = cluster), position = "stack") + coord_flip() 

In [None]:
all_markers  %>% dplyr::filter(source == "scRNAseq_collecTRI")  %>% 
mutate(gene_direction = paste(gene, direction))  %>% 
group_by(gene, gene_direction)  %>% tally  %>% arrange(desc(n))

# Genes with similar DE trend in T0 and T1

In this part, we will look at the markers and check which of them are consistently up- or downregulated at both T0 and T1. 

## Overlap of DEG in T0 and T1

First, we will see which genes show up the most times in our DE results. 

In [None]:
i = 1

In [None]:
 mrk_clust  <- all_markers  %>% dplyr::filter(source == "scRNAseq_RNA") %>% 
mutate(gene_direction = paste(gene, direction))   %>% 
dplyr::filter(test_type %in% c("Dia T0 vs Ctrl T0", "Dia T1 vs Ctrl T0") &
             dataset == datasets[1])  %>% group_by(gene_direction)   %>% tally()  %>% arrange(desc(n))  %>% 
dplyr::filter(n>1)
mrk_clust$dataset  <- datasets[1]

In [None]:
mrk_clust_all  <- mrk_clust

In [None]:
for(i in 2:length(datasets)){
    mrk_clust  <- all_markers  %>% dplyr::filter(source == "scRNAseq_RNA") %>% 
mutate(gene_direction = paste(gene, direction))   %>% 
dplyr::filter(test_type %in% c("Dia T0 vs Ctrl T0", "Dia T1 vs Ctrl T0") &
             dataset == datasets[i])  %>% group_by(gene_direction)   %>% tally()  %>% arrange(desc(n))  %>% 
dplyr::filter(n>1)
    mrk_clust$dataset  <- datasets[i]
    mrk_clust_all  <- rbind(mrk_clust_all, mrk_clust)
    }


In [None]:
mrk_clust_all  %>% nrow

In this table, we show the DEG from our analysis with the direction of change and the counts it occurs among significantly up/down regulated genes when looking at the contrasts `"Dia T0 vs Ctrl T0"` and `"Dia T1 vs Ctrl T0"` across all the subpopulations we have defined.  

In [None]:
mrk_clust_all  %>% group_by(gene_direction)  %>% 
tally  %>% arrange(desc(n))  %>% separate(gene_direction, into = c("gene", "direction"),
                                          sep = " ", remove = F)  %>% 
dplyr::filter(direction == "Down")

Let's see what are the top downregulated genes (down in T1D compared to healthy).

In [None]:
down_genes  <- mrk_clust_all  %>% group_by(gene_direction)  %>% 
tally  %>% arrange(desc(n))  %>% separate(gene_direction, into = c("gene", "direction"),
                                          sep = " ", remove = F)  %>% 
dplyr::filter(direction == "Down")  %>% pull(gene)

In [None]:
down_genes

Let's see what are the top upregulated genes (up in T1D compared to healthy).

In [None]:
up_genesd  <- mrk_clust_all  %>% group_by(gene_direction)  %>% 
tally  %>% arrange(desc(n))  %>% separate(gene_direction, into = c("gene", "direction"),
                                          sep = " ", remove = F)  %>% 
dplyr::filter(direction == "Up")  %>% pull(gene)

In [None]:
up_genesd

## Genes in T1 and T0 using fold changes

To plot all genes, we will use fold changes rather that the list of DEG, as we're interested also in the direction of change, even if it is not significant. 

We will load the fold changes calculated in parts 06 and 07 of the analysis. The precalculated values are saved also on Zenodo. 

In [None]:
set_path_to_wd = "../../240218_VN_Diabetes_V05/"

In [None]:
paths_fc  <- list.files(paste0(set_path_to_wd, "tables/fold_change"), full.names = T)
dataset_names  <- list.files(paste0(set_path_to_wd, "tables/fold_change"), full.names = F)

In [None]:
paths_fc

In [None]:
paths_fc  <- paths_fc[which(grepl(paths_fc, pattern = "Ctrl"))]

In [None]:
dataset_names  <- dataset_names[which(grepl(dataset_names, pattern = "Ctrl"))]

In [None]:
dataset_names

In [None]:
paths_fc

We will create one table containing the fold changes from the DiaT0_vs_Ctrl contrast. 

In [None]:
all_fcs1  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
  fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia")) 
 fc.df$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 fc.df$dataset  <-   gsub(fc.df$dataset, pattern = "_Ctrl_vs_DiaT0", replacement = "")
 fc.df$comaprison = "DiaT0_vs_Ctrl"   
    
 return(fc.df)
    }

In [None]:
all_fcs1_result  <- map(seq(from = 1, to = 41, by = 2), all_fcs1)

In [None]:
all_fcs_result  <- bind_rows(all_fcs1_result)

We will create another table containing the fold changes from the DiaT1_vs_Ctrl contrast. 

In [None]:
all_fcs2  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
  fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia")) 
 fc.df$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 fc.df$dataset  <-   gsub(fc.df$dataset, pattern = "_Ctrl_vs_DiaT1", replacement = "")
 fc.df$comaprison = "DiaT1_vs_Ctrl"
    
 return(fc.df)
    }

In [None]:
all_fcs2_result  <- map(seq(from = 2, to = 42, by = 2), all_fcs2)

In [None]:
all_fcs2_result  <- bind_rows(all_fcs2_result)

Now let's bind the two tables together. 

In [None]:
all_fcs_result  <- rbind(all_fcs_result, all_fcs2_result)

Now we can visualize the top up/down regulated genes in both time points with all fold change values. 

In [None]:
options(repr.plot.height = 7, repr.plot.width = 25)
all_fcs_result  %>% 
mutate(is_shared  = gene %in% up_genesd | gene %in% down_genes,
       direction_t0_t1 = ifelse(gene %in% up_genesd, "UP", ifelse(gene %in% down_genes, "DOWN", NA_character_)))  %>% 
dplyr::filter(is_shared == TRUE)  %>% 
ggplot(aes(factor(gene), dataset)) + 
facet_grid(cols = vars(direction_t0_t1), rows = vars(comaprison), space = "free", scales = "free") +
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = avg_log2FC, alpha = p_val_adj < 0.05)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
scale_alpha_manual(values = c(0.4,1)) +
 scale_colour_gradient2(low = "blue", mid = "white", high = "red") + 
 theme(axis.text.x = element_text(angle = 90))

Another visualization will be the logFC/logFC plot, which compares the logFC at T0 and at T1. 

Set the colors.

In [None]:
colors_of_populations = c(
    'cd4_l1_full' = "#ffa66aff",
    'cd4_l2_subcluster'  = "#ffa66aff",
    'cd4_l2_unc'  = "#cd3333ff",
    'cd4_l3_isaghi'  = "#a46464ff",
    'cd4_l3_naive'  = "#a58413ff",
    'cd4_l3_nfkb'  = "#a46c43ff",
    'cd4_l3_proliferating'  = "#9b3097ff", 
    'cd4_l3_temra'  = "#7d252aff",
    'cd4_l3_tfh'  = "#8e6a1dff",
    'cd4_l3_th1th17'  = "#a94e4eff",
    'cd4_l3_th2'  = "#b36672ff",
    'cd4_l3_treg'  = "#ba6d8dff",
    'cd8_l1_full'  = "#67934aff",
    'cd8_l2_nk'  = "#105292ff",
    'cd8_l2_subcluster' = "#67934aff", 
    'cd8_l2_unc'  = "#6d00c0ff",
    'cd8_l3_naive'  = "#004455ff",
    'cd8_l3_prolif'  = "#5f711aff",
    'cd8_l3_tcm'  = "#427c39ff",
    'cd8_l3_tem'  = "#004455ff",
    'cd8_l3_temra'  = "#16481fff",
    "grey" = "grey88"
)

Plot the logFC/logFC plot for the main datasets of CD4 and CD8 T cells. 

In [None]:
options(repr.plot.height = 12, repr.plot.width = 14)

all_fcs_result  %>% 
dplyr::filter(gene %in% all_markers$gene)  %>% 
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(grepl(gene, pattern = "MTRN")) & 
              dataset %in% c("cd4_l1_full_filt", "cd8_l1_full_filt"))  %>% 
dplyr::select(avg_log2FC, gene, dataset, comaprison)  %>% 
pivot_wider(names_from = "comaprison", values_from = avg_log2FC)  %>% 
mutate(gene_in_genes = ifelse(DiaT0_vs_Ctrl > 0.15 & DiaT1_vs_Ctrl > 0.15 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl < -0.2, 
                              gene, NA_character_))  %>% 
ggplot(aes(x = DiaT0_vs_Ctrl, y = DiaT1_vs_Ctrl, label = gene_in_genes, color = dataset))  +
scale_color_manual(values = c("#ffa66aff","#67934aff"))  +
geom_point(aes(shape = dataset)) + 
scale_shape_manual(values = c(21,22)) +
ggrepel::geom_text_repel() +
theme_bw() +
ggtheme()
ggsave(filename = "../figures/DE_genes/corr_DE_t0_t1.svg", width = 14, height = 12)

Just CD4. 

In [None]:
all_fcs_result  %>% 
dplyr::filter(gene %in% all_markers$gene)  %>% 
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(grepl(gene, pattern = "MTRN")) & 
              dataset %in% c("cd4_l1_full_filt"))  %>% 
dplyr::select(avg_log2FC, gene, dataset, comaprison)  %>% 
pivot_wider(names_from = "comaprison", values_from = avg_log2FC)  %>% 
mutate(gene_in_genes = ifelse(DiaT0_vs_Ctrl > 0.10 & DiaT1_vs_Ctrl > 0.10 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl < -0.2 |
                              DiaT0_vs_Ctrl < -0.1 & DiaT1_vs_Ctrl > 0.1 |
                              DiaT0_vs_Ctrl > 0.1 & DiaT1_vs_Ctrl < -0.1 |
                              DiaT1_vs_Ctrl < -0.5 |
                              DiaT1_vs_Ctrl > 0.10, 
                              gene, NA_character_))  %>% 
ggplot(aes(x = DiaT0_vs_Ctrl, y = DiaT1_vs_Ctrl, label = gene_in_genes, color = dataset))  +
scale_color_manual(values = c("#d45500ff"))  +
geom_point(aes(shape = dataset)) + 
scale_shape_manual(values = c(21,22)) +
ggrepel::geom_text_repel() +
theme_bw() +
ggtheme()
ggsave(filename = "../figures/DE_genes/corr_DE_t0_t1_cd4.svg", width = 14, height = 12)

Just CD8. 

In [None]:
all_fcs_result  %>% 
dplyr::filter(gene %in% all_markers$gene)  %>% 
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(grepl(gene, pattern = "MTRN")) & 
              dataset %in% c("cd8_l1_full_filt"))  %>% 
dplyr::select(avg_log2FC, gene, dataset, comaprison)  %>% 
pivot_wider(names_from = "comaprison", values_from = avg_log2FC)  %>% 
mutate(gene_in_genes = ifelse(DiaT0_vs_Ctrl > 0.20 & DiaT1_vs_Ctrl > 0.20 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl < -0.2 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl > 0.2 |
                              DiaT0_vs_Ctrl > 0.1 & DiaT1_vs_Ctrl < -0.1 |
                              DiaT1_vs_Ctrl < -0.25 |
                             DiaT0_vs_Ctrl < -0.3 |
                              DiaT1_vs_Ctrl < -0.1, 
                              gene, NA_character_))  %>% 
ggplot(aes(x = DiaT0_vs_Ctrl, y = DiaT1_vs_Ctrl, label = gene_in_genes, color = dataset))  +
scale_color_manual(values = c("#67934aff"))  +
geom_point(aes(shape = dataset)) + 
scale_shape_manual(values = c(21,22)) +
ggrepel::geom_text_repel() +
theme_bw() +
ggtheme()
ggsave(filename = "../figures/DE_genes/corr_DE_t0_t1_cd8.svg", width = 14, height = 12)

For the L2 populations, we will create one plot for CD4 and one plot for CD8 cells. 

In [None]:
colors_of_populations = c(
    'cd4_l1_full' = "#ffa66aff",
    'cd4_l2_subcluster'  = "#ffa66aff",
    'cd4_l2_unc'  = "#cd3333ff",
    'cd4_l3_isaghi'  = "#a46464ff",
    'cd4_l3_naive'  = "#a58413ff",
    'cd4_l3_nfkb'  = "#a46c43ff",
    'cd4_l3_proliferating'  = "#9b3097ff", 
    'cd4_l3_temra'  = "#7d252aff",
    'cd4_l3_tfh'  = "#8e6a1dff",
    'cd4_l3_th1th17'  = "#a94e4eff",
    'cd4_l3_th2'  = "#b36672ff",
    'cd4_l3_treg'  = "#ba6d8dff",
    'cd8_l1_full'  = "#67934aff",
    'cd8_l2_nk'  = "#105292ff",
    'cd8_l2_subcluster' = "#67934aff", 
    'cd8_l2_unc'  = "#6d00c0ff",
    'cd8_l3_naive'  = "#004455ff",
    'cd8_l3_prolif'  = "#5f711aff",
    'cd8_l3_tcm'  = "#427c39ff",
    'cd8_l3_tem'  = "#004455ff",
    'cd8_l3_temra'  = "#16481fff",
    "grey" = "grey88"
)

Plot L2 subpopulations in CD4 T cells. 

In [None]:
all_fcs_result  %>% 
dplyr::filter(gene %in% all_markers$gene)  %>% 
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(grepl(gene, pattern = "MTRN")) & 
              grepl(dataset, pattern = "cd4_l3"))  %>% 
dplyr::select(avg_log2FC, gene, dataset, comaprison)  %>% 
pivot_wider(names_from = "comaprison", values_from = avg_log2FC)  %>% 
mutate(gene_in_genes = ifelse(DiaT0_vs_Ctrl > 0.15 & DiaT1_vs_Ctrl > 0.15 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl < -0.2, 
                              gene, NA_character_))  %>% 
ggplot(aes(x = DiaT0_vs_Ctrl, y = DiaT1_vs_Ctrl, label = gene_in_genes, color = dataset))  +
scale_color_manual(values = colors_of_populations)  +
geom_point(aes(shape = dataset)) + 
scale_shape_manual(values = c(21:25,21:25)) +
ggrepel::geom_text_repel() +
theme_bw() +
ggtheme()
ggsave(filename = "../figures/DE_genes/corr_DE_cd4_pop_t0_t1.svg", width = 14, height = 12)

Plot L2 subpopulations in CD8 T cells. 

In [None]:
all_fcs_result  %>% 
dplyr::filter(gene %in% all_markers$gene)  %>% 
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(grepl(gene, pattern = "MTRN")) & 
              grepl(dataset, pattern = "cd8_l3"))  %>% 
dplyr::select(avg_log2FC, gene, dataset, comaprison)  %>% 
pivot_wider(names_from = "comaprison", values_from = avg_log2FC)  %>% 
mutate(gene_in_genes = ifelse(DiaT0_vs_Ctrl > 0.15 & DiaT1_vs_Ctrl > 0.15 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl < -0.2, 
                              gene, NA_character_))  %>% 
ggplot(aes(x = DiaT0_vs_Ctrl, y = DiaT1_vs_Ctrl, label = gene_in_genes, color = dataset))  +
scale_color_manual(values = colors_of_populations)  +
geom_point(aes(shape = dataset)) + 
scale_shape_manual(values = c(21:25,21:25)) +
ggrepel::geom_text_repel() +
theme_bw() +
ggtheme()
ggsave(filename = "../figures/DE_genes/corr_DE_cd8_pop_t0_t1.svg", width = 14, height = 12)

In [None]:
options(repr.plot.height = 10, repr.plot.width = 13)

all_fcs_result  %>% 
dplyr::filter(gene %in% all_markers$gene)  %>% 
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(grepl(gene, pattern = "MTRN")) & 
              dataset %in% c("cd4_l1_full_filt", "cd8_l1_full_filt"))  %>% 
dplyr::select(avg_log2FC, gene, dataset, comaprison)  %>% 
pivot_wider(names_from = "comaprison", values_from = avg_log2FC)  %>% 
mutate(gene_in_genes = ifelse(DiaT0_vs_Ctrl > 0.15 & DiaT1_vs_Ctrl > 0.15 |
                              DiaT0_vs_Ctrl > 0.15 & DiaT1_vs_Ctrl < -0.2 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl > 0.15 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl < -0.2, 
                              gene, NA_character_))  %>% 
ggplot(aes(x = DiaT0_vs_Ctrl, y = DiaT1_vs_Ctrl, label = gene_in_genes, color = dataset))  +
scale_color_manual(values = c("#ffa66aff","#67934aff"))  +
geom_point(aes(shape = dataset)) + 
scale_shape_manual(values = c(21,22)) +
ggrepel::geom_text_repel() +
theme_bw() +
ggtheme()

# Heatmap markers in populations

In this section, we will create heatmaps showing the DEG between T1D and healthy in the main populations. 

For each contrast and each l2 subpopulation, we will take the top 5 genes and we will create a heatmap showing the logFCs of these genes. Statistical significance will be showed based on the adjusted p-value.

## CD4

### T1D T0 vs Ctrl

We will check the count of the union of the top 5 DEG in subpopulations and save this marker to a variable called hm. 

In [None]:
all_markers  %>% 
separate(dataset, into = c("cd4_cd8", "Level", NA, NA), sep = "_", remove = F)  %>% 
dplyr::filter(cd4_cd8 == "cd4")  %>% 
dplyr::filter(!(dataset %in% c("cd8_l2_subcluster","cd4_l2_subcluster")))  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type == "Dia T0 vs Ctrl T0" )  %>% 
group_by(dataset)  %>% 
slice_max(order_by = avg_log2FC, n = 5)  %>% 
pull(gene)  %>% unique  %>% length

In [None]:
hm  <- all_markers  %>% 
separate(dataset, into = c("cd4_cd8", "Level", NA, NA), sep = "_", remove = F)  %>% 
dplyr::filter(cd4_cd8 == "cd4")  %>%
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(dataset %in% c("cd8_l2_subcluster","cd4_l2_subcluster")))  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type == "Dia T0 vs Ctrl T0" )  %>% 
group_by(dataset)  %>% 
slice_max(order_by = avg_log2FC, n = 5)  %>% 
pull(gene)  %>% unique 

We will use a function to get the fold change for all of the hm genes. 

In [None]:
get_fc_genes  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "Ctrl")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia"))  %>% 
dplyr::filter(gene %in% hm) 
        
        } else {
        
   if(grepl(paths_fc[j], pattern = "PR_0_T0")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "PR_0 T0"))  %>% 
dplyr::filter(gene %in% hm) 
        } else {
       
        if(grepl(paths_fc[j], pattern = "keto")){
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Keto_1 T0"))   %>% 
dplyr::filter(gene %in% hm) 
    } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia T0"))   %>% 
dplyr::filter(gene %in% hm) 
    }}}

    fc.df$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fc.df)
    }

In [None]:
paths_fc  <- list.files("../../240617_VN_Diabetes_V06/tables/fold_change/", full.names = T)
dataset_names  <- list.files("../../240617_VN_Diabetes_V06/tables/fold_change", full.names = F)

paths_fc  <- paths_fc[grepl(paths_fc, pattern = "l3")|grepl(paths_fc, pattern = "cd8_l2_unc")|
                      grepl(paths_fc, pattern = "cd4_l2_unc")|grepl(paths_fc, pattern = "cd8_l2_nk") ]

dataset_names  <- dataset_names[grepl(dataset_names, pattern = "l3")|grepl(dataset_names, pattern = "cd8_l2_unc")|
                      grepl(dataset_names, pattern = "cd4_l2_unc")|grepl(dataset_names, pattern = "cd8_l2_nk") ]

paths_fc  <- paths_fc[grepl(paths_fc, pattern = "Ctrl_vs_DiaT0")|grepl(paths_fc, pattern = "DiaT0_vs_DiaT1")|
                      grepl(paths_fc, pattern = "Ctrl_vs_DiaT1")|
                      grepl(paths_fc, pattern = "PR_0_T0_vs_PR_1_T0")|grepl(paths_fc, pattern = "ketoacidosis")]

dataset_names  <- dataset_names[grepl(dataset_names, pattern = "Ctrl_vs_DiaT0")|grepl(dataset_names, pattern = "DiaT0_vs_DiaT1")|
                      grepl(dataset_names, pattern = "Ctrl_vs_DiaT1")|
                      grepl(dataset_names, pattern = "PR_0_T0_vs_PR_1_T0")|grepl(dataset_names, pattern = "ketoacidosis")]


In [None]:
fcs  <- purrr::map(1:length(paths_fc), get_fc_genes)

In [None]:
fcs  <- bind_rows(fcs)

We now have the full table, so we need to clean the data and plot the heatmap. 

In [None]:
fcs$population  <- substr(fcs$dataset, 1,11)

table(fcs$population)

fcs  <- fcs  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

fcs$population2  %>% table

fcs  <- fcs  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


fcs$test_type  %>% table

In [None]:
fcs2  <- fcs  %>% mutate(andjus_logFC = ifelse(p_val_adj<0.05,avg_log2FC,avg_log2FC)) %>% 
dplyr::filter(test_type == "Ctrl_vs_DiaT0") 

Please note, that we are excluding MTRN mitochondrial genes, as they're very noisy. 

In [None]:
fcs2_mtx  <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd8"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(avg_log2FC, gene, population2)  %>% 
pivot_wider(names_from = "population2", values_from = "avg_log2FC")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

We will define a function to recode statistical sifnificance to star. 

In [None]:
pval_to_stars <- function(p) {
   ifelse(p < 0.05, "*", "") 
}

# Apply function to p-value matrix
star_matrix <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd8"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(p_val_adj, gene, population2)  %>% 
mutate(p_val_adj = pval_to_stars(p_val_adj))  %>% 
pivot_wider(names_from = "population2", values_from = "p_val_adj")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

rownames(star_matrix) <- rownames(fcs2_mtx)
colnames(star_matrix) <- colnames(fcs2_mtx)



Save the heatmap. 

In [None]:
options(repr.plot.height = 6, repr.plot.width = 3.5)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks)),
                   filename = "../figures/heatmaps/hm_deg_allclusters_cd4_diaT0_ctrl.pdf",
                   width = 3.5, height = 7
         )

Repeat this code for each contrast. 

### T1D T1 vs Ctrl

In [None]:
hm  <- all_markers  %>% 
separate(dataset, into = c("cd4_cd8", "Level", NA, NA), sep = "_", remove = F)  %>% 
dplyr::filter(cd4_cd8 == "cd4")  %>%
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(dataset %in% c("cd8_l2_subcluster","cd4_l2_subcluster")))  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type == "Dia T1 vs Ctrl T0" )  %>% 
group_by(dataset)  %>% 
slice_max(order_by = avg_log2FC, n = 5)  %>% 
pull(gene)  %>% unique 

In [None]:
fcs  <- purrr::map(1:length(paths_fc), get_fc_genes)

In [None]:
fcs  <- bind_rows(fcs)

In [None]:
fcs$population  <- substr(fcs$dataset, 1,11)

table(fcs$population)

fcs  <- fcs  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

fcs$population2  %>% table

fcs  <- fcs  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


fcs$test_type  %>% table

In [None]:
fcs2  <- fcs  %>% mutate(andjus_logFC = ifelse(p_val_adj<0.05,avg_log2FC,avg_log2FC)) %>% 
dplyr::filter(test_type == "Ctrl_vs_DiaT1") 

In [None]:
fcs2_mtx  <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd8"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(avg_log2FC, gene, population2)  %>% 
pivot_wider(names_from = "population2", values_from = "avg_log2FC")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

In [None]:
pval_to_stars <- function(p) {
   ifelse(p < 0.05, "*", "") 
}

# Apply function to p-value matrix
star_matrix <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd8"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(p_val_adj, gene, population2)  %>% 
mutate(p_val_adj = pval_to_stars(p_val_adj))  %>% 
pivot_wider(names_from = "population2", values_from = "p_val_adj")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

rownames(star_matrix) <- rownames(fcs2_mtx)
colnames(star_matrix) <- colnames(fcs2_mtx)



In [None]:
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks)),
                   #filename = "../figures/heatmaps/hm_deg_allclusters_cd4_diaT1_ctrl.pdf",
                   width = 3.5, height = 7
         )

In [None]:
options(repr.plot.height = 6, repr.plot.width = 3.5)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks)),
                   filename = "../figures/heatmaps/hm_deg_allclusters_cd4_diaT1_ctrl.pdf",
                   width = 3.5, height = 7
         )



### T1D T1 vs Ctrl

In [None]:
hm  <- all_markers  %>% 
separate(dataset, into = c("cd4_cd8", "Level", NA, NA), sep = "_", remove = F)  %>% 
dplyr::filter(cd4_cd8 == "cd4")  %>%
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(dataset %in% c("cd8_l2_subcluster","cd4_l2_subcluster")))  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type == "Dia T0 vs Dia T1" )  %>% 
group_by(dataset)  %>% 
slice_max(order_by = avg_log2FC, n = 5)  %>% 
pull(gene)  %>% unique 

In [None]:
fcs  <- purrr::map(1:length(paths_fc), get_fc_genes)

In [None]:
fcs  <- bind_rows(fcs)

In [None]:
fcs$population  <- substr(fcs$dataset, 1,11)

table(fcs$population)

fcs  <- fcs  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

fcs$population2  %>% table

fcs  <- fcs  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


fcs$test_type  %>% table

In [None]:
fcs2  <- fcs  %>% mutate(andjus_logFC = ifelse(p_val_adj<0.05,avg_log2FC,avg_log2FC)) %>% 
dplyr::filter(test_type == "iaT0_vs_DiaT1") 

In [None]:
fcs2_mtx  <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd8"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(avg_log2FC, gene, population2)  %>% 
pivot_wider(names_from = "population2", values_from = "avg_log2FC")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

In [None]:
pval_to_stars <- function(p) {
   ifelse(p < 0.05, "*", "") 
}

# Apply function to p-value matrix
star_matrix <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd8"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(p_val_adj, gene, population2)  %>% 
mutate(p_val_adj = pval_to_stars(p_val_adj))  %>% 
pivot_wider(names_from = "population2", values_from = "p_val_adj")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

rownames(star_matrix) <- rownames(fcs2_mtx)
colnames(star_matrix) <- colnames(fcs2_mtx)



In [None]:
options(repr.plot.height = 6, repr.plot.width = 4)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks))
         )

In [None]:
options(repr.plot.height = 6, repr.plot.width = 3.5)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks)),
                   filename = "../figures/heatmaps/hm_deg_allclusters_cd4_dia0_diat1.pdf",
                   width = 3.5, height = 7
         )



### PR 0 at T0 vs PR 1 at T0

In [None]:
hm  <- all_markers  %>% 
separate(dataset, into = c("cd4_cd8", "Level", NA, NA), sep = "_", remove = F)  %>% 
dplyr::filter(cd4_cd8 == "cd4")  %>%
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(dataset %in% c("cd8_l2_subcluster","cd4_l2_subcluster")))  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type == "PR_0 T0 vs PR_1 T0" )  %>% 
group_by(dataset)  %>% 
slice_max(order_by = avg_log2FC, n = 5)  %>% 
pull(gene)  %>% unique 

In [None]:
fcs  <- purrr::map(1:length(paths_fc), get_fc_genes)

In [None]:
fcs  <- bind_rows(fcs)

In [None]:
fcs$population  <- substr(fcs$dataset, 1,11)

table(fcs$population)

fcs  <- fcs  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

fcs$population2  %>% table

fcs  <- fcs  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


fcs$test_type  %>% table

In [None]:
fcs2  <- fcs  %>% mutate(andjus_logFC = ifelse(p_val_adj<0.05,avg_log2FC,avg_log2FC)) %>% 
dplyr::filter(test_type == "T0_vs_PR_1_T0") 

In [None]:
fcs2_mtx  <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd8"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(avg_log2FC, gene, population2)  %>% 
pivot_wider(names_from = "population2", values_from = "avg_log2FC")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

In [None]:
pval_to_stars <- function(p) {
   ifelse(p < 0.05, "*", "") 
}

# Apply function to p-value matrix
star_matrix <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd8"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(p_val_adj, gene, population2)  %>% 
mutate(p_val_adj = pval_to_stars(p_val_adj))  %>% 
pivot_wider(names_from = "population2", values_from = "p_val_adj")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

rownames(star_matrix) <- rownames(fcs2_mtx)
colnames(star_matrix) <- colnames(fcs2_mtx)



In [None]:
options(repr.plot.height = 7, repr.plot.width = 4)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks))
         )

In [None]:
options(repr.plot.height = 6, repr.plot.width = 3.5)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks)),
                   filename = "../figures/heatmaps/hm_deg_allclusters_cd4_pr0_pr1.pdf",
                   width = 3.5, height = 7
         )



### Keto 1 at T0 vs Keto 0 at T0

In [None]:
hm  <- all_markers  %>% 
separate(dataset, into = c("cd4_cd8", "Level", NA, NA), sep = "_", remove = F)  %>% 
dplyr::filter(cd4_cd8 == "cd4")  %>%
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(dataset %in% c("cd8_l2_subcluster","cd4_l2_subcluster")))  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type == "Keto_1 T0 vs Keto_0 T0" )  %>% 
group_by(dataset)  %>% 
slice_max(order_by = avg_log2FC, n = 5)  %>% 
pull(gene)  %>% unique 

In [None]:
fcs  <- purrr::map(1:length(paths_fc), get_fc_genes)

In [None]:
fcs  <- bind_rows(fcs)

In [None]:
fcs$population  <- substr(fcs$dataset, 1,11)

table(fcs$population)

fcs  <- fcs  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

fcs$population2  %>% table

fcs  <- fcs  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


fcs$test_type  %>% table

In [None]:
fcs2  <- fcs  %>% mutate(andjus_logFC = ifelse(p_val_adj<0.05,avg_log2FC,avg_log2FC)) %>% 
dplyr::filter(test_type == "_ketoacidosis") 

In [None]:
fcs2_mtx  <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd8"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(avg_log2FC, gene, population2)  %>% 
pivot_wider(names_from = "population2", values_from = "avg_log2FC")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

In [None]:
pval_to_stars <- function(p) {
   ifelse(p < 0.05, "*", "") 
}

# Apply function to p-value matrix
star_matrix <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd8"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(p_val_adj, gene, population2)  %>% 
mutate(p_val_adj = pval_to_stars(p_val_adj))  %>% 
pivot_wider(names_from = "population2", values_from = "p_val_adj")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

rownames(star_matrix) <- rownames(fcs2_mtx)
colnames(star_matrix) <- colnames(fcs2_mtx)



In [None]:
options(repr.plot.height = 7, repr.plot.width = 4)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks))
         )

In [None]:
options(repr.plot.height = 6, repr.plot.width = 3.5)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks)),
                   filename = "../figures/heatmaps/hm_deg_allclusters_cd4_ketoacidosis.pdf",
                   width = 3.5, height = 7
         )



## CD8 populations

### T1D T0 vs Ctrl

In [None]:
hm  <- all_markers  %>% 
separate(dataset, into = c("cd4_cd8", "Level", NA, NA), sep = "_", remove = F)  %>% 
dplyr::filter(cd4_cd8 == "cd8")  %>%
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(dataset %in% c("cd8_l2_subcluster","cd4_l2_subcluster")))  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type == "Dia T0 vs Ctrl T0" )  %>% 
group_by(dataset)  %>% 
slice_max(order_by = avg_log2FC, n = 5)  %>% 
pull(gene)  %>% unique 

In [None]:
get_fc_genes  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "Ctrl")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia"))  %>% 
dplyr::filter(gene %in% hm) 
        
        } else {
        
   if(grepl(paths_fc[j], pattern = "PR_0_T0")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "PR_0 T0"))  %>% 
dplyr::filter(gene %in% hm) 
        } else {
       
        if(grepl(paths_fc[j], pattern = "keto")){
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Keto_1 T0"))   %>% 
dplyr::filter(gene %in% hm) 
    } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia T0"))   %>% 
dplyr::filter(gene %in% hm) 
    }}}

    fc.df$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fc.df)
    }

In [None]:
paths_fc  <- list.files("../../240617_VN_Diabetes_V06/tables/fold_change/", full.names = T)
dataset_names  <- list.files("../../240617_VN_Diabetes_V06/tables/fold_change", full.names = F)

paths_fc  <- paths_fc[grepl(paths_fc, pattern = "l3")|grepl(paths_fc, pattern = "cd8_l2_unc")|
                      grepl(paths_fc, pattern = "cd4_l2_unc")|grepl(paths_fc, pattern = "cd8_l2_nk") ]

dataset_names  <- dataset_names[grepl(dataset_names, pattern = "l3")|grepl(dataset_names, pattern = "cd8_l2_unc")|
                      grepl(dataset_names, pattern = "cd4_l2_unc")|grepl(dataset_names, pattern = "cd8_l2_nk") ]

paths_fc  <- paths_fc[grepl(paths_fc, pattern = "Ctrl_vs_DiaT0")|grepl(paths_fc, pattern = "DiaT0_vs_DiaT1")|
                      grepl(paths_fc, pattern = "Ctrl_vs_DiaT1")|
                      grepl(paths_fc, pattern = "PR_0_T0_vs_PR_1_T0")|grepl(paths_fc, pattern = "ketoacidosis")]

dataset_names  <- dataset_names[grepl(dataset_names, pattern = "Ctrl_vs_DiaT0")|grepl(dataset_names, pattern = "DiaT0_vs_DiaT1")|
                      grepl(dataset_names, pattern = "Ctrl_vs_DiaT1")|
                      grepl(dataset_names, pattern = "PR_0_T0_vs_PR_1_T0")|grepl(dataset_names, pattern = "ketoacidosis")]


In [None]:
fcs  <- purrr::map(1:length(paths_fc), get_fc_genes)

In [None]:
fcs  <- bind_rows(fcs)

In [None]:
fcs$population  <- substr(fcs$dataset, 1,11)

table(fcs$population)

fcs  <- fcs  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

fcs$population2  %>% table

fcs  <- fcs  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


fcs$test_type  %>% table

In [None]:
fcs2  <- fcs  %>% mutate(andjus_logFC = ifelse(p_val_adj<0.05,avg_log2FC,avg_log2FC)) %>% 
dplyr::filter(test_type == "Ctrl_vs_DiaT0") 

In [None]:
fcs2_mtx  <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd4"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(avg_log2FC, gene, population2)  %>% 
pivot_wider(names_from = "population2", values_from = "avg_log2FC")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

In [None]:
pval_to_stars <- function(p) {
   ifelse(p < 0.05, "*", "") 
}

# Apply function to p-value matrix
star_matrix <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd4"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(p_val_adj, gene, population2)  %>% 
mutate(p_val_adj = pval_to_stars(p_val_adj))  %>% 
pivot_wider(names_from = "population2", values_from = "p_val_adj")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

rownames(star_matrix) <- rownames(fcs2_mtx)
colnames(star_matrix) <- colnames(fcs2_mtx)



In [None]:
options(repr.plot.height = 6, repr.plot.width = 3.5)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks)),
                   filename = "../figures/heatmaps/hm_deg_allclusters_cd8_diaT0_ctrl.pdf",
                   width = 3.5, height = 7
         )

### T1D T1 vs Ctrl

In [None]:
hm  <- all_markers  %>% 
separate(dataset, into = c("cd4_cd8", "Level", NA, NA), sep = "_", remove = F)  %>% 
dplyr::filter(cd4_cd8 == "cd8")  %>%
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(dataset %in% c("cd8_l2_subcluster","cd4_l2_subcluster")))  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type == "Dia T1 vs Ctrl T0" )  %>% 
group_by(dataset)  %>% 
slice_max(order_by = avg_log2FC, n = 5)  %>% 
pull(gene)  %>% unique 

In [None]:
fcs  <- purrr::map(1:length(paths_fc), get_fc_genes)

In [None]:
fcs  <- bind_rows(fcs)

In [None]:
fcs$population  <- substr(fcs$dataset, 1,11)

table(fcs$population)

fcs  <- fcs  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

fcs$population2  %>% table

fcs  <- fcs  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


fcs$test_type  %>% table

In [None]:
fcs2  <- fcs  %>% mutate(andjus_logFC = ifelse(p_val_adj<0.05,avg_log2FC,avg_log2FC)) %>% 
dplyr::filter(test_type == "Ctrl_vs_DiaT1") 

In [None]:
fcs2_mtx  <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd4"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(avg_log2FC, gene, population2)  %>% 
pivot_wider(names_from = "population2", values_from = "avg_log2FC")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

In [None]:
pval_to_stars <- function(p) {
   ifelse(p < 0.05, "*", "") 
}

# Apply function to p-value matrix
star_matrix <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd4"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(p_val_adj, gene, population2)  %>% 
mutate(p_val_adj = pval_to_stars(p_val_adj))  %>% 
pivot_wider(names_from = "population2", values_from = "p_val_adj")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

rownames(star_matrix) <- rownames(fcs2_mtx)
colnames(star_matrix) <- colnames(fcs2_mtx)



In [None]:
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks)),
                   #filename = "../figures/heatmaps/hm_deg_allclusters_cd8_diaT1_ctrl.pdf",
                   width = 3.5, height = 7
         )

In [None]:
options(repr.plot.height = 6, repr.plot.width = 3.5)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks)),
                   filename = "../figures/heatmaps/hm_deg_allclusters_cd8_diaT1_ctrl.pdf",
                   width = 3.5, height = 7
         )



### T1D T1 vs Ctrl

In [None]:
hm  <- all_markers  %>% 
separate(dataset, into = c("cd4_cd8", "Level", NA, NA), sep = "_", remove = F)  %>% 
dplyr::filter(cd4_cd8 == "cd8")  %>%
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(dataset %in% c("cd8_l2_subcluster","cd4_l2_subcluster")))  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type == "Dia T0 vs Dia T1" )  %>% 
group_by(dataset)  %>% 
slice_max(order_by = avg_log2FC, n = 5)  %>% 
pull(gene)  %>% unique 

In [None]:
fcs  <- purrr::map(1:length(paths_fc), get_fc_genes)

In [None]:
fcs  <- bind_rows(fcs)

In [None]:
fcs$population  <- substr(fcs$dataset, 1,11)

table(fcs$population)

fcs  <- fcs  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

fcs$population2  %>% table

fcs  <- fcs  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


fcs$test_type  %>% table

In [None]:
fcs2  <- fcs  %>% mutate(andjus_logFC = ifelse(p_val_adj<0.05,avg_log2FC,avg_log2FC)) %>% 
dplyr::filter(test_type == "iaT0_vs_DiaT1") 

In [None]:
fcs2_mtx  <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd4"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(avg_log2FC, gene, population2)  %>% 
pivot_wider(names_from = "population2", values_from = "avg_log2FC")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

In [None]:

# Apply function to p-value matrix
star_matrix <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd4"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(p_val_adj, gene, population2)  %>% 
mutate(p_val_adj = pval_to_stars(p_val_adj))  %>% 
pivot_wider(names_from = "population2", values_from = "p_val_adj")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

rownames(star_matrix) <- rownames(fcs2_mtx)
colnames(star_matrix) <- colnames(fcs2_mtx)



In [None]:
options(repr.plot.height = 6, repr.plot.width = 4)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks))
         )

In [None]:
options(repr.plot.height = 6, repr.plot.width = 3.5)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks)),
                   filename = "../figures/heatmaps/hm_deg_allclusters_cd8_dia0_diat1.pdf",
                   width = 3.5, height = 7
         )



### PR 0 at T0 vs PR 1 at T0

In [None]:
hm  <- all_markers  %>% 
separate(dataset, into = c("cd4_cd8", "Level", NA, NA), sep = "_", remove = F)  %>% 
dplyr::filter(cd4_cd8 == "cd8")  %>%
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(dataset %in% c("cd8_l2_subcluster","cd4_l2_subcluster")))  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type == "PR_0 T0 vs PR_1 T0" )  %>% 
group_by(dataset)  %>% 
slice_max(order_by = avg_log2FC, n = 5)  %>% 
pull(gene)  %>% unique 

In [None]:
fcs  <- purrr::map(1:length(paths_fc), get_fc_genes)

In [None]:
fcs  <- bind_rows(fcs)

In [None]:
fcs$population  <- substr(fcs$dataset, 1,11)

table(fcs$population)

fcs  <- fcs  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

fcs$population2  %>% table

fcs  <- fcs  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


fcs$test_type  %>% table

In [None]:
fcs2  <- fcs  %>% mutate(andjus_logFC = ifelse(p_val_adj<0.05,avg_log2FC,avg_log2FC)) %>% 
dplyr::filter(test_type == "T0_vs_PR_1_T0") 

In [None]:
fcs2_mtx  <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd4"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(avg_log2FC, gene, population2)  %>% 
pivot_wider(names_from = "population2", values_from = "avg_log2FC")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

In [None]:
# Apply function to p-value matrix
star_matrix <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd4"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::select(p_val_adj, gene, population2)  %>% 
mutate(p_val_adj = pval_to_stars(p_val_adj))  %>% 
pivot_wider(names_from = "population2", values_from = "p_val_adj")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

rownames(star_matrix) <- rownames(fcs2_mtx)
colnames(star_matrix) <- colnames(fcs2_mtx)

In [None]:
options(repr.plot.height = 7, repr.plot.width = 4)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks))
         )

In [None]:
options(repr.plot.height = 6, repr.plot.width = 3.5)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks)),
                   filename = "../figures/heatmaps/hm_deg_allclusters_cd8_pr0_pr1.pdf",
                   width = 3.5, height = 7
         )



### Keto 1 at T0 vs Keto 0 at T0

In [None]:
hm  <- all_markers  %>% 
separate(dataset, into = c("cd4_cd8", "Level", NA, NA), sep = "_", remove = F)  %>% 
dplyr::filter(cd4_cd8 == "cd8")  %>%
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(dataset %in% c("cd8_l2_subcluster","cd4_l2_subcluster")))  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type == "Keto_1 T0 vs Keto_0 T0" )  %>% 
group_by(dataset)  %>% 
slice_max(order_by = avg_log2FC, n = 5)  %>% 
pull(gene)  %>% unique 

In [None]:
fcs  <- purrr::map(1:length(paths_fc), get_fc_genes)

In [None]:
fcs  <- bind_rows(fcs)

In [None]:
fcs$population  <- substr(fcs$dataset, 1,11)

table(fcs$population)

fcs  <- fcs  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

fcs$population2  %>% table

fcs  <- fcs  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


fcs$test_type  %>% table

In [None]:
fcs2  <- fcs  %>% mutate(andjus_logFC = ifelse(p_val_adj<0.05,avg_log2FC,avg_log2FC)) %>% 
dplyr::filter(test_type == "_ketoacidosis") 

In [None]:
fcs2_mtx  <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd4"))  %>%
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::filter(!grepl(gene, pattern = "IGLV"))  %>% 
dplyr::select(avg_log2FC, gene, population2)  %>% 
pivot_wider(names_from = "population2", values_from = "avg_log2FC")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

In [None]:
# Apply function to p-value matrix
star_matrix <- fcs2  %>% 
dplyr::filter(!grepl(dataset, pattern = "cd4"))  %>% 
dplyr::filter(!grepl(gene, pattern = "MTRN"))  %>% 
dplyr::filter(!grepl(gene, pattern = "IGLV"))  %>% 
dplyr::select(p_val_adj, gene, population2)  %>% 
mutate(p_val_adj = pval_to_stars(p_val_adj))  %>% 
pivot_wider(names_from = "population2", values_from = "p_val_adj")  %>% 
column_to_rownames("gene")  %>% 
as.matrix()

rownames(star_matrix) <- rownames(fcs2_mtx)
colnames(star_matrix) <- colnames(fcs2_mtx)



In [None]:
options(repr.plot.height = 7, repr.plot.width = 4)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks))
         )

In [None]:
options(repr.plot.height = 6, repr.plot.width = 3.5)
breaks = seq(-2, 2, length.out = 100)
pheatmap::pheatmap(fcs2_mtx, scale = "none", cluster_cols = T, cluster_rows = T,
         #color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", display_numbers = star_matrix, breaks = breaks,
         color = colorRampPalette(c("blue", "white", "red"))(length(breaks)),
                   filename = "../figures/heatmaps/hm_deg_allclusters_cd8_ketoacidosis.pdf",
                   width = 3.5, height = 7
         )



# DotPlot - FoldChanges main genes in all comparisons

In [None]:
hm  <- c("LEF1","BACH2","NELL2","TCF7","CXCR4","ZFP36L2","IL7R","KLF2","CCR7","SELL",
            
             
            rev(c("TNFRSF9","PRF1","NKG7","GZMB","CST7","GNLY","GZMA","CX3CR1","CCL5","TNF","TBX21","IFNG")),
             "TNFAIP3","DUSP1","TSC22D3","NFKBIA","DDIT4","INPP4B",
             rev(c("OASL","IFI44L","ISG15","MX1","STAT1","IFI6","IFIT3")),
                  "BTN3A2","BTN3A3")

In [None]:
get_fc_genes  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "Ctrl")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia"))  %>% 
dplyr::filter(gene %in% hm) 
        
        } else {
        
   if(grepl(paths_fc[j], pattern = "PR_0_T0")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "PR_0 T0"))  %>% 
dplyr::filter(gene %in% hm) 
        } else {
       
        if(grepl(paths_fc[j], pattern = "keto")){
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Keto_1 T0"))   %>% 
dplyr::filter(gene %in% hm) 
    } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia T0"))   %>% 
dplyr::filter(gene %in% hm) 
    }}}

    fc.df$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fc.df)
    }

In [None]:
paths_fc  <- list.files("../../240617_VN_Diabetes_V06/tables/fold_change/", full.names = T)
dataset_names  <- list.files("../../240617_VN_Diabetes_V06/tables/fold_change", full.names = F)

paths_fc  <- paths_fc[grepl(paths_fc, pattern = "l3")|grepl(paths_fc, pattern = "cd8_l2_unc")|
                      grepl(paths_fc, pattern = "cd4_l2_unc")|grepl(paths_fc, pattern = "cd8_l2_nk") ]

dataset_names  <- dataset_names[grepl(dataset_names, pattern = "l3")|grepl(dataset_names, pattern = "cd8_l2_unc")|
                      grepl(dataset_names, pattern = "cd4_l2_unc")|grepl(dataset_names, pattern = "cd8_l2_nk") ]

paths_fc  <- paths_fc[grepl(paths_fc, pattern = "Ctrl_vs_DiaT0")|grepl(paths_fc, pattern = "DiaT0_vs_DiaT1")|
                      grepl(paths_fc, pattern = "Ctrl_vs_DiaT1")|
                      grepl(paths_fc, pattern = "PR_0_T0_vs_PR_1_T0")|grepl(paths_fc, pattern = "ketoacidosis")]

dataset_names  <- dataset_names[grepl(dataset_names, pattern = "Ctrl_vs_DiaT0")|grepl(dataset_names, pattern = "DiaT0_vs_DiaT1")|
                      grepl(dataset_names, pattern = "Ctrl_vs_DiaT1")|
                      grepl(dataset_names, pattern = "PR_0_T0_vs_PR_1_T0")|grepl(dataset_names, pattern = "ketoacidosis")]


In [None]:
fcs  <- purrr::map(1:length(paths_fc), get_fc_genes)

In [None]:
fcs  <- bind_rows(fcs)

In [None]:
fcs$population  <- substr(fcs$dataset, 1,11)

table(fcs$population)

fcs  <- fcs  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

fcs$population2  %>% table

fcs  <- fcs  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


fcs$test_type  %>% table

In [None]:
fcs2  <- fcs  %>% mutate(andjus_logFC = ifelse(p_val_adj<0.05,avg_log2FC,avg_log2FC))  %>% 
dplyr::select(andjus_logFC, gene, dataset)

In [None]:
options(repr.plot.height = 8, repr.plot.width = 24)
fcs  %>% 
ggplot(aes(x = factor(population2, levels = c(
'cd8_l3_prol',
'cd8_l3_tcm_',
'cd4_l3_th1t',
'cd4_l3_temr',
'cd8_l3_temr',
'cd4_l3_tfh_',
'cd8_l3_naiv',
'cd4_l3_naiv',
'cd4_l3_th2_',
'cd4_l3_nfkb',
'cd4_l2_unc_',
'cd4_l3_treg',
'cd4_l3_isag',
'cd4_l3_prol',
'cd8_l3_tem_',
'cd8_l2_nk',
'cd8_l2_unc_'    
))          
           , y = factor(gene, levels = rev(hm)))) +
geom_point(aes(size = -log(p_val_adj), color = avg_log2FC)) +
scale_alpha_manual(values = c(0.5,1)) +
scale_size_continuous(range = c(3,8))+
ylab("") + xlab("") +
facet_grid(cols = vars(test_type), space = "free", scales = "free") +
scale_color_gradientn(colours = c("blue", "blue","white", "red","red")) + 
theme_classic() + theme(axis.text.x = element_text(angle = 90)) + ggtheme()

In [None]:
ggsave("../figures/DE_genes/heatmap_deg.svg", width = 45, height = 25, units = "cm", create.dir = TRUE)

For the revisions, we will create the plot separately for CD4 and CD8. 

### CD8

In [None]:
options(repr.plot.height = 12, repr.plot.width = 16)
fcs  %>% 
dplyr::filter(grepl(population2, pattern = "cd8"))  %>% 
ggplot(aes(x = factor(population2, levels = c(
'cd8_l3_prol',
'cd8_l3_tcm_',
'cd4_l3_th1t',
'cd4_l3_temr',
'cd8_l3_temr',
'cd4_l3_tfh_',
'cd8_l3_naiv',
'cd4_l3_naiv',
'cd4_l3_th2_',
'cd4_l3_nfkb',
'cd4_l2_unc_',
'cd4_l3_treg',
'cd4_l3_isag',
'cd4_l3_prol',
'cd8_l3_tem_',
'cd8_l2_nk',
'cd8_l2_unc_'    
))          
           , y = factor(gene, levels = rev(hm)))) +
geom_point(aes(size = -log(p_val_adj), color = avg_log2FC)) +
scale_alpha_manual(values = c(0.5,1)) +
scale_size_continuous(range = c(3,8))+
ylab("") + xlab("") +
facet_grid(cols = vars(test_type), space = "free", scales = "free") +
scale_color_gradientn(colours = c("blue", "blue","white", "red","red"),limits = c(-2, 2)) +
theme_classic() + theme(axis.text.x = element_text(angle = 90)) + ggtheme()

In [None]:
ggsave("../figures/DE_genes/heatmap_deg_cd8.svg", width = 34, height = 28, units = "cm", create.dir = TRUE)

### CD4

In [None]:
fcs  %>% 
dplyr::filter(grepl(population2, pattern = "cd4"))

In [None]:
options(repr.plot.height = 12, repr.plot.width = 16)
fcs  %>% 
dplyr::filter(grepl(population2, pattern = "cd4"))  %>% 
ggplot(aes(x = factor(population2, levels = c(
'cd8_l3_prol',
'cd8_l3_tcm_',
'cd4_l3_th1t',
'cd4_l3_temr',
'cd8_l3_temr',
'cd4_l3_tfh_',
'cd8_l3_naiv',
'cd4_l3_naiv',
'cd4_l3_th2_',
'cd4_l3_nfkb',
'cd4_l2_unc_',
'cd4_l3_treg',
'cd4_l3_isag',
'cd4_l3_prol',
'cd8_l3_tem_',
'cd8_l2_nk',
'cd8_l2_unc_'    
))          
           , y = factor(gene, levels = rev(hm)))) +
geom_point(aes(size = -log(p_val_adj), color = avg_log2FC)) +
scale_alpha_manual(values = c(0.5,1)) +
scale_size_continuous(range = c(3,8))+
ylab("") + xlab("") +
facet_grid(cols = vars(test_type), space = "free", scales = "free") +
scale_color_gradientn(colours = c("blue", "blue","white", "red","red"),limits = c(-1.5, 1.5)) +
theme_classic() + theme(axis.text.x = element_text(angle = 90)) + ggtheme()

In [None]:
ggsave("../figures/DE_genes/heatmap_deg_cd4.svg", width = 40, height = 28, units = "cm", create.dir = TRUE)

# Top 100 genes in datasets / comparisons

In [None]:
get_fc_genes  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "Ctrl")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia"))  
        
        } else {
        
   if(grepl(paths_fc[j], pattern = "PR_0_T0")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "PR_0 T0"))  
        } else {
       
        if(grepl(paths_fc[j], pattern = "keto")){
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Keto_1 T0"))   
    } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia T0"))   
    }}}

    fc.df$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fc.df)
    }

In [None]:
paths_fc  <- list.files("../../240617_VN_Diabetes_V06/tables/fold_change/", full.names = T)
dataset_names  <- list.files("../../240617_VN_Diabetes_V06/tables/fold_change", full.names = F)

paths_fc  <- paths_fc[grepl(paths_fc, pattern = "l3")|grepl(paths_fc, pattern = "cd8_l2_unc")|
                      grepl(paths_fc, pattern = "cd4_l2_unc")|grepl(paths_fc, pattern = "cd8_l2_nk") ]

dataset_names  <- dataset_names[grepl(dataset_names, pattern = "l3")|grepl(dataset_names, pattern = "cd8_l2_unc")|
                      grepl(dataset_names, pattern = "cd4_l2_unc")|grepl(dataset_names, pattern = "cd8_l2_nk") ]

paths_fc  <- paths_fc[grepl(paths_fc, pattern = "Ctrl_vs_DiaT0")|grepl(paths_fc, pattern = "DiaT0_vs_DiaT1")|
                      grepl(paths_fc, pattern = "Ctrl_vs_DiaT1")|
                      grepl(paths_fc, pattern = "PR_0_T0_vs_PR_1_T0")|grepl(paths_fc, pattern = "ketoacidosis")]

dataset_names  <- dataset_names[grepl(dataset_names, pattern = "Ctrl_vs_DiaT0")|grepl(dataset_names, pattern = "DiaT0_vs_DiaT1")|
                      grepl(dataset_names, pattern = "Ctrl_vs_DiaT1")|
                      grepl(dataset_names, pattern = "PR_0_T0_vs_PR_1_T0")|grepl(dataset_names, pattern = "ketoacidosis")]


In [None]:
fcs  <- purrr::map(1:length(paths_fc), get_fc_genes)

In [None]:
fcs  <- bind_rows(fcs)

In [None]:
fcs$population  <- substr(fcs$dataset, 1,11)

table(fcs$population)

fcs  <- fcs  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

fcs$population2  %>% table

fcs  <- fcs  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


fcs$test_type  %>% table

In [None]:
fcs_cd8  <- fcs  %>% dplyr::filter(grepl(dataset, pattern = "cd8"))  %>% 
dplyr::filter(!grepl(gene, pattern = "^IG"))  %>% 
dplyr::filter(!grepl(gene, pattern = "^MTR"))  

In [None]:
fcs_cd4  <- fcs  %>% dplyr::filter(grepl(dataset, pattern = "cd4")) %>% 
dplyr::filter(!grepl(gene, pattern = "^IG"))  %>% 
dplyr::filter(!grepl(gene, pattern = "^MTR"))  

### CD8

In [None]:
fcs_cd8

In [None]:
fcs_cd8  %>% arrange(desc(avg_log2FC))

In [None]:
for(i in levels(factor(fcs_cd8$population2))){
   for(j in levels(factor(fcs_cd8$population2))){
    fcs1  <- fcs_cd8  %>% dplyr::filter(population2 == i)  
    fcs2  <- fcs_cd8  %>% dplyr::filter(population2 == j) 
    
    for(m in levels(factor(fcs_cd8$test_type))) {
        
       fcs3  <- fcs1  %>% dplyr::filter(test_type == m)  %>% arrange(desc(avg_log2FC))  %>% pull(gene)
       fcs4  <- fcs2  %>% dplyr::filter(test_type == m)  %>% arrange(desc(avg_log2FC))  %>% pull(gene)
        
        if(m == levels(factor(fcs_cd8$test_type))[1]) {
           df1  <- data.frame(population1 = i, 
                              population2 = j,
                              test_type = m,
                              overlap = length(intersect(fcs3[1:100],fcs4[1:100]))) 
            df_all  <-  df1
        } else {
            df1  <- data.frame(population1 = i, 
                              population2 = j,
                              test_type = m,
                              overlap = length(intersect(fcs3[1:100],fcs4[1:100]))) 
            df_all  <- rbind(df_all, df1)
        }
        
       
    }
       
    if(i == levels(factor(fcs_cd8$population2))[1] & j == levels(factor(fcs_cd8$population2))[1]){
     df_all_all  <- df_all
        
    } else {
      df_all_all  <- rbind(df_all_all, df_all)
     
        
    }  
       
       
    } 
        
}



In [None]:
df_all_all  %>% dplyr::filter(population1 == population2)

In [None]:
breaks = seq(0, 100, length.out = 100)

In [None]:
dir.create("../figures/heatmaps/overlaps_deg/")

In [None]:
options(repr.plot.height = 6, repr.plot.width = 6.3)
for(i in levels(factor(fcs_cd8$test_type))){

hm  <- df_all_all  %>% 
dplyr::filter(test_type == i)  %>% 
    mutate(overlap = if_else(population1 == population2,0,overlap))  %>% 
pivot_wider(names_from = population2, values_from = overlap)  %>% 
dplyr::select(-test_type)  %>% 
column_to_rownames("population1")  %>% 
    as.matrix()
hm  <- hm[]    
print(pheatmap::pheatmap(mat = hm, scale = "none", cluster_rows = F, cluster_cols = F, 
                         main = i, fontsize = 20,
                        border_color = "white", breaks = breaks,
         color = colorRampPalette(c("white","#e8d7d7ff", "red","red4"))(length(breaks))
         ))
    
pheatmap::pheatmap(mat = hm, scale = "none", cluster_rows = F, cluster_cols = F, 
                         main = i, fontsize = 20,
                        border_color = "white", breaks = breaks,
         color = colorRampPalette(c("white","#e8d7d7ff", "red","red4"))(length(breaks)),
                   filename = paste0("../figures/heatmaps/overlaps_deg/up_",i,".pdf"),
                   width = 6.3, height = 6
         )    
}

In [None]:
for(i in levels(factor(fcs_cd8$population2))){
   for(j in levels(factor(fcs_cd8$population2))){
    fcs1  <- fcs_cd8  %>% dplyr::filter(population2 == i)  
    fcs2  <- fcs_cd8  %>% dplyr::filter(population2 == j) 
    
    for(m in levels(factor(fcs_cd8$test_type))) {
        
       fcs3  <- fcs1  %>% dplyr::filter(test_type == m)  %>% arrange((avg_log2FC))  %>% pull(gene)
       fcs4  <- fcs2  %>% dplyr::filter(test_type == m)  %>% arrange((avg_log2FC))  %>% pull(gene)
        
        if(m == levels(factor(fcs_cd8$test_type))[1]) {
           df1  <- data.frame(population1 = i, 
                              population2 = j,
                              test_type = m,
                              overlap = length(intersect(fcs3[1:100],fcs4[1:100]))) 
            df_all  <-  df1
        } else {
            df1  <- data.frame(population1 = i, 
                              population2 = j,
                              test_type = m,
                              overlap = length(intersect(fcs3[1:100],fcs4[1:100]))) 
            df_all  <- rbind(df_all, df1)
        }
        
       
    }
       
    if(i == levels(factor(fcs_cd8$population2))[1] & j == levels(factor(fcs_cd8$population2))[1]){
     df_all_all_down  <- df_all
        
    } else {
      df_all_all_down  <- rbind(df_all_all_down, df_all)
     
        
    }  
       
       
    } 
        
}



In [None]:
breaks = seq(0, 100, length.out = 100)

In [None]:
options(repr.plot.height = 6, repr.plot.width = 6.3)
for(i in levels(factor(fcs_cd8$test_type))){

hm  <- df_all_all_down  %>% 
dplyr::filter(test_type == i)  %>% 
    mutate(overlap = if_else(population1 == population2,0,overlap))  %>% 
pivot_wider(names_from = population2, values_from = overlap)  %>% 
dplyr::select(-test_type)  %>% 
column_to_rownames("population1")  %>% 
    as.matrix()
hm  <- hm[]    
print(pheatmap::pheatmap(mat = hm, scale = "none", cluster_rows = F, cluster_cols = F, 
                         main = i, fontsize = 20,
                        border_color = "white", breaks = breaks,
         color = colorRampPalette(c("white","#d1d1e4ff", "blue","blue4"))(length(breaks)),
                         ,
                   filename = paste0("../figures/heatmaps/overlaps_deg/down_",i,".pdf"),
                   width = 6.3, height = 6
         ))
}

### CD4

In [None]:
for(i in levels(factor(fcs_cd4$population2))){
   for(j in levels(factor(fcs_cd4$population2))){
    fcs1  <- fcs_cd4  %>% dplyr::filter(population2 == i)  
    fcs2  <- fcs_cd4  %>% dplyr::filter(population2 == j) 
    
    for(m in levels(factor(fcs_cd4$test_type))) {
        
       fcs3  <- fcs1  %>% dplyr::filter(test_type == m)  %>% arrange(desc(avg_log2FC))  %>% pull(gene)
       fcs4  <- fcs2  %>% dplyr::filter(test_type == m)  %>% arrange(desc(avg_log2FC))  %>% pull(gene)
        
        if(m == levels(factor(fcs_cd4$test_type))[1]) {
           df1  <- data.frame(population1 = i, 
                              population2 = j,
                              test_type = m,
                              overlap = length(intersect(fcs3[1:100],fcs4[1:100]))) 
            df_all  <-  df1
        } else {
            df1  <- data.frame(population1 = i, 
                              population2 = j,
                              test_type = m,
                              overlap = length(intersect(fcs3[1:100],fcs4[1:100]))) 
            df_all  <- rbind(df_all, df1)
        }
        
       
    }
       
    if(i == levels(factor(fcs_cd4$population2))[1] & j == levels(factor(fcs_cd4$population2))[1]){
     df_all_all  <- df_all
        
    } else {
      df_all_all  <- rbind(df_all_all, df_all)
     
        
    }  
       
       
    } 
        
}



In [None]:
df_all_all  %>% dplyr::filter(population1 == population2)

In [None]:
breaks = seq(0, 100, length.out = 100)

In [None]:
dir.create("../figures/heatmaps/overlaps_deg/")

In [None]:
options(repr.plot.height = 6, repr.plot.width = 6.3)
for(i in levels(factor(fcs_cd4$test_type))){

hm  <- df_all_all  %>% 
dplyr::filter(test_type == i)  %>% 
    mutate(overlap = if_else(population1 == population2,0,overlap))  %>% 
pivot_wider(names_from = population2, values_from = overlap)  %>% 
dplyr::select(-test_type)  %>% 
column_to_rownames("population1")  %>% 
    as.matrix()
hm  <- hm[]    
print(pheatmap::pheatmap(mat = hm, scale = "none", cluster_rows = F, cluster_cols = F, 
                         main = i, fontsize = 20,
                        border_color = "white", breaks = breaks,
         color = colorRampPalette(c("white","#e8d7d7ff", "red","red4"))(length(breaks))
         ))
    
pheatmap::pheatmap(mat = hm, scale = "none", cluster_rows = F, cluster_cols = F, 
                         main = i, fontsize = 20,
                        border_color = "white", breaks = breaks,
         color = colorRampPalette(c("white","#e8d7d7ff", "red","red4"))(length(breaks)),
                   filename = paste0("../figures/heatmaps/overlaps_deg/up_cd4_",i,".pdf"),
                   width = 6.3, height = 6
         )    
}

In [None]:
for(i in levels(factor(fcs_cd4$population2))){
   for(j in levels(factor(fcs_cd4$population2))){
    fcs1  <- fcs_cd4  %>% dplyr::filter(population2 == i)  
    fcs2  <- fcs_cd4  %>% dplyr::filter(population2 == j) 
    
    for(m in levels(factor(fcs_cd4$test_type))) {
        
       fcs3  <- fcs1  %>% dplyr::filter(test_type == m)  %>% arrange((avg_log2FC))  %>% pull(gene)
       fcs4  <- fcs2  %>% dplyr::filter(test_type == m)  %>% arrange((avg_log2FC))  %>% pull(gene)
        
        if(m == levels(factor(fcs_cd4$test_type))[1]) {
           df1  <- data.frame(population1 = i, 
                              population2 = j,
                              test_type = m,
                              overlap = length(intersect(fcs3[1:100],fcs4[1:100]))) 
            df_all  <-  df1
        } else {
            df1  <- data.frame(population1 = i, 
                              population2 = j,
                              test_type = m,
                              overlap = length(intersect(fcs3[1:100],fcs4[1:100]))) 
            df_all  <- rbind(df_all, df1)
        }
        
       
    }
       
    if(i == levels(factor(fcs_cd4$population2))[1] & j == levels(factor(fcs_cd4$population2))[1]){
     df_all_all_down  <- df_all
        
    } else {
      df_all_all_down  <- rbind(df_all_all_down, df_all)
     
        
    }  
       
       
    } 
        
}



In [None]:
breaks = seq(0, 100, length.out = 100)

In [None]:
options(repr.plot.height = 6, repr.plot.width = 6.3)
for(i in levels(factor(fcs_cd4$test_type))){

hm  <- df_all_all_down  %>% 
dplyr::filter(test_type == i)  %>% 
    mutate(overlap = if_else(population1 == population2,0,overlap))  %>% 
pivot_wider(names_from = population2, values_from = overlap)  %>% 
dplyr::select(-test_type)  %>% 
column_to_rownames("population1")  %>% 
    as.matrix()
hm  <- hm[]    
print(pheatmap::pheatmap(mat = hm, scale = "none", cluster_rows = F, cluster_cols = F, 
                         main = i, fontsize = 20,
                        border_color = "white", breaks = breaks,
         color = colorRampPalette(c("white","#d1d1e4ff", "blue","blue4"))(length(breaks)),
                         ,
                   filename = paste0("../figures/heatmaps/overlaps_deg/down_cd4_",i,".pdf"),
                   width = 6.3, height = 6
         ))
}

# GSEA

In this part, we will be focusing on GSEA analyses calculated from the fold changes between T1D patients and healthy donors. First, we will calculate the log fold changes for each dataset and for the following contrasts:

* T1D T0 vs Healthy
* T1D T1 vs Healthy
* T1D T0 vs T1D T1
* T1D Partial remission no vs T1D Partial remission yes in T0
* T1D Partial remission no vs T1D Partial remission yes in T1
* T1D Ketoacidosis no vs T1D Ketoacidosis yes

Note that the first condition represents the worse state of (or lack of) disease so positive logFC is generally associated with disease, while negative logFC is generally associated with health.

## Fold Changes in Our Dataset

We will load all the datasets in which we will calculate logFCs. Please note that this takes very long time to run, so please download the GSEA tables from the Zenodo and load them if you want to save time. 

In [None]:
cd8_l3_tem  <- readRDS("../data/processed/L3/cd8_l3_tem.rds")
cd8_l3_naive  <- readRDS("../data/processed/L3/cd8_l3_naive.rds")
cd8_l3_tcm  <- readRDS("../data/processed/L3/cd8_l3_tcm.rds")
cd8_l3_temra  <- readRDS("../data/processed/L3/cd8_l3_temra.rds")
cd8_l3_prolif  <- readRDS("../data/processed/L3/cd8_l3_prolif.rds")
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")
cd8_l2_nk  <- readRDS("../data/processed/L2/cd8_l2_nk.rds")
cd8_l2_unc  <- readRDS("../data/processed/L2/cd8_l2_unc.rds")
cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")
cd4_l3_naive  <- readRDS("../data/processed/L3/cd4_l3_naive.rds")
cd4_l3_tfh  <- readRDS("../data/processed/L3/cd4_l3_tfh.rds")
cd4_l3_th1th17  <- readRDS("../data/processed/L3/cd4_l3_th1_17.rds")
cd4_l3_nfkb  <- readRDS("../data/processed/L3/cd4_l3_nfkb.rds")
cd4_l3_th2  <- readRDS("../data/processed/L3/cd4_l3_th2.rds")
cd4_l3_treg  <- readRDS("../data/processed/L3/cd4_l3_treg.rds")
cd4_l3_isaghi  <- readRDS("../data/processed/L3/cd4_l3_isaghi.rds")
cd4_l3_proliferating  <- readRDS("../data/processed/L3/cd4_l3_proliferating.rds")
cd4_l3_temra  <- readRDS("../data/processed/L3/cd4_l3_temra.rds")
cd4_l2_unc  <- readRDS("../data/processed/L2/cd4_l2_unc.rds")
cd4_l2_subcluster  <- readRDS("../data/processed/L2/cd4_subcluster.rds")
cd4_l1_full_filt  <- readRDS("../data/processed/L1/cd4_l1_full_filt.rds")
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")

We will create list of these datasets. 

In [None]:
dataset_list  <- list(cd8_l3_tem, cd8_l3_naive, cd8_l3_tcm, cd8_l3_temra, cd8_l3_prolif, 
                    cd8_l2_nk, cd8_l2_unc, cd8_l1_full_filt,
                     cd4_l3_naive, cd4_l3_tfh, cd4_l3_th1th17, cd4_l3_nfkb, cd4_l3_th2,
                     cd4_l3_treg, cd4_l3_isaghi, cd4_l3_proliferating, cd4_l3_temra,
                     cd4_l2_unc, cd4_l2_subcluster, cd4_l1_full_filt, cd8_l2_subcluster)

names(dataset_list) <- c("cd8_l3_tem", "cd8_l3_naive", "cd8_l3_tcm", "cd8_l3_temra", "cd8_l3_prolif",
                       "cd8_l2_nk", "cd8_l2_unc", "cd8_l1_full_filt",
                        "cd4_l3_naive", "cd4_l3_tfh", "cd4_l3_th1th17", "cd4_l3_nfkb", "cd4_l3_th2",
                     "cd4_l3_treg", "cd4_l3_isaghi", "cd4_l3_proliferating", "cd4_l3_temra",
                     "cd4_l2_unc", "cd4_l2_subcluster", "cd4_l1_full_filt", "cd8_l2_subcluster")

In [None]:
options(future.globals.maxSize = 2000 * 1024^2)

In [None]:
dir.create("../tables/fold_change/")

First, let's calculate DEG between T1D (both timepoints) and healthy. 

In [None]:
#plan("multisession", workers = 4)

for(i in 1:length(dataset_list)){

seurat_object  <- dataset_list[[i]]
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Disease

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], ".csv"))
}

Then calculate all DEG in all contrasts. 

In [None]:
#plan("multisession", workers = 8)

for(i in 1:length(dataset_list)){
###### Ctrl T0 vs Dia T0
seurat_object  <- subset(dataset_list[[i]], Condition %in% c("Ctrl T0", "Dia T0"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_Ctrl_vs_DiaT0.csv"))
    
####### Ctrl T0 vs Dia T1
seurat_object  <- subset(dataset_list[[i]], Condition %in% c("Ctrl T0", "Dia T1"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_Ctrl_vs_DiaT1.csv"))
    
    ####### Dia T0 vs Dia T1
seurat_object  <- subset(dataset_list[[i]], Condition %in% c("Dia T0", "Dia T1"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_DiaT0_vs_DiaT1.csv"))
    
    ####### Part remission
seurat_object  <- subset(dataset_list[[i]], Condition2 %in% c("Part_remission_0", "Part_remission_1"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition2

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_remission.csv"))
}

Add metadata to calculate remission and ketoacidosis. 

In [None]:
md_for_remission  <- cd4_l1_full_filt@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, part_remission_y_n)  %>% 
tally %>% mutate(Condition2 = paste0("PR_",part_remission_y_n))  %>% 
dplyr::select(-n, -part_remission_y_n)

for( i in 1:length(dataset_list)) {
    
    dataset_list[[i]]$Condition2  <- NULL
    dataset_list[[i]]@meta.data  <- dataset_list[[i]]@meta.data  %>% 
    left_join(md_for_remission)  %>% mutate(Condition2 = paste(Condition2, Time))  %>% 
    mutate(Condition2 = ifelse(grepl(Condition2, pattern = "NA"), NA_character_,Condition2))
    
    print(dataset_list[[i]]$Condition2  %>% table)
    rownames(dataset_list[[i]]@meta.data)  <- colnames(dataset_list[[i]])
}

In [None]:
md_for_ketoacidosis  <- cd4_l1_full_filt@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, ph_man)  %>% 
tally %>% mutate(Ketoacidosis = ifelse(ph_man<7.3,"Keto_1","Keto_0"))  %>% 
dplyr::select(-ph_man, -n)

for( i in 1:length(dataset_list)) {
    
    #dataset_list[[i]]$Ketoacidosis  <- NULL
    dataset_list[[i]]@meta.data  <- dataset_list[[i]]@meta.data  %>% 
    left_join(md_for_ketoacidosis)   %>% mutate(Keto_Time = paste(Ketoacidosis, Time))  %>% 
    mutate(Keto_Time = ifelse(grepl(Keto_Time, pattern = "NA"), NA_character_,Keto_Time))
    
    print(dataset_list[[i]]$Ketoacidosis  %>% table)
    rownames(dataset_list[[i]]@meta.data)  <- colnames(dataset_list[[i]])
}

In [None]:
plan("multisession", workers = 4)

for(i in 1:length(dataset_list)){
###### Keto_Time
seurat_object  <- subset(dataset_list[[i]], Keto_Time %in% c("Keto_1 T0", "Keto_0 T0"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Keto_Time

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_ketoacidosis.csv"))
    
####### PR0 T0 vs PR0 T1
seurat_object  <- subset(dataset_list[[i]], Condition2 %in% c("PR_0 T0", "PR_0 T1"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition2

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_PR_0_T0_vs_PR_0_T1.csv"))
    
####### PR1 T0 vs PR1 T1
seurat_object  <- subset(dataset_list[[i]], Condition2 %in% c("PR_1 T0", "PR_1 T1"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition2

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_PR_1_T0_vs_PR_1_T1.csv"))
    
####### PR0 T0 vs PR1 T0
seurat_object  <- subset(dataset_list[[i]], Condition2 %in% c("PR_0 T0", "PR_1 T0"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition2

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_PR_0_T0_vs_PR_1_T0.csv"))
    
    ####### PR0 T1 vs PR1 T1
seurat_object  <- subset(dataset_list[[i]], Condition2 %in% c("PR_0 T1", "PR_1 T1"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition2

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_PR_0_T1_vs_PR_1_T1.csv"))
    }

## Test GSEA all pathways

From the calculated fold changes, we will perform GSEA analysis using fgsea. 

In [None]:
m_df <- msigdbr(species = "Homo sapiens", category = "C7")


In [None]:
fgsea_sets <- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

Let's read the precalculated fold changes. 

In [None]:
paths_fc  <- list.files("../../240218_VN_Diabetes_V05/tables/fold_change", full.names = T)
dataset_names  <- list.files("../../240218_VN_Diabetes_V05/tables/fold_change", full.names = F)


In [None]:
paths_fc

In [None]:
paths_fc  <- paths_fc[which(grepl(paths_fc, pattern = "Ctrl")|grepl(paths_fc, pattern = "DiaT0_vs_DiaT1"))]

In [None]:
dataset_names  <- dataset_names[which(grepl(dataset_names, pattern = "Ctrl")|grepl(dataset_names, pattern = "DiaT0_vs_DiaT1"))]

## C7 pathways

In [None]:
gsea_all_pathways  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "Ctrl")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)
        } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia T0"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)       
    }
fgseaRes <- fgsea(pathways = fgsea_sets, 
                 stats = ranks)
 fgseaRes$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fgseaRes)
    }

In [None]:
gsea  <- map(.x = c(1:length(paths_fc)), .f = gsea_all_pathways)

In [None]:
gseas  <- bind_rows(gsea)

In [None]:
dir.create("../tables/gsea")

In [None]:
gseas_df  <- gseas  %>% as.data.frame

In [None]:
gseas_df  <- gseas_df  %>% mutate(
    leadingEdge2 = paste(leadingEdge))

In [None]:
gseas_df$leadingEdge  <- NULL

In [None]:
write.csv(gseas_df, "../tables/gsea/gsea_fold_changes_for_heatmap.csv")

In [None]:
gseas_df  <- read_csv("../../240218_VN_Diabetes_V05/tables/gsea/gsea_fold_changes_for_heatmap.csv")

## Hallmark pathways

In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "H")


fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
names(fgsea_sets)

In [None]:
gsea_all_pathways  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "Ctrl")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)
        } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia T0"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)       
    }
fgseaRes <- fgsea(pathways = fgsea_sets, 
                 stats = ranks)
 fgseaRes$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fgseaRes)
    }

In [None]:
gsea  <- map(.x = c(1:length(paths_fc)), .f = gsea_all_pathways)

In [None]:
gseas  <- bind_rows(gsea)

In [None]:
gseas  %>% arrange(padj)

In [None]:
gseas_df  <- gseas  %>% as.data.frame

In [None]:
gseas_df  <- gseas_df  %>% mutate(
    leadingEdge2 = paste(leadingEdge))

In [None]:
gseas_df$leadingEdge  <- NULL

In [None]:
write.csv(gseas_df, "../tables/gsea/gsea_fold_changes_for_heatmap_hallmark.csv")

# Plot GSEA figure

Here, we will prepare the GSEA plots for the figure. 

In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "C2")

fgsea_sets_hs_c2 <- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "C7")

fgsea_sets_hs_c7 <- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

## Ranks T0

First, let's use the fold changes from the contrast Dia T0 vs healthy. 

In [None]:
paths_fc  <- list.files("../../240218_VN_Diabetes_V05/tables/fold_change", full.names = T)
dataset_names  <- list.files("../../240218_VN_Diabetes_V05/tables/fold_change", full.names = F)


paths_fc  <- paths_fc[grepl(paths_fc, pattern = "_Ctrl_vs_DiaT0")]
dataset_names  <- dataset_names[grepl(dataset_names, pattern = "_Ctrl_vs_DiaT0")]

dataset_names

paths_fc

In [None]:
fc.df  <- read_csv(paths_fc[1])  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Dia T0")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks_cd4_t0 <- deframe(fc.df)

In [None]:
fc.df  <- read_csv(paths_fc[13])  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Dia T0")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks_cd8_t0 <- deframe(fc.df)

In [None]:
ranks_cd8_t0

## Ranks T1

Next, let's use the fold changes from the contrast Dia T1 vs healthy. 

In [None]:
paths_fc2  <- list.files("../../240218_VN_Diabetes_V05/tables/fold_change", full.names = T)
dataset_names2  <- list.files("../../240218_VN_Diabetes_V05/tables/fold_change", full.names = F)


paths_fc2  <- paths_fc2[grepl(paths_fc2, pattern = "_Ctrl_vs_DiaT1")]
dataset_names2  <- dataset_names2[grepl(dataset_names2, pattern = "_Ctrl_vs_DiaT1")]

In [None]:
paths_fc2
dataset_names2


In [None]:
fc.df  <- read_csv(paths_fc2[1])  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Dia T1")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks_cd4_t1 <- deframe(fc.df)



In [None]:
fc.df  <- read_csv(paths_fc2[13])  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Dia T1")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks_cd8_t1 <- deframe(fc.df)

In [None]:
paths_fc2[13]

In [None]:
ranks_cd8_t1

We will plot any pathway in the different contrasts:
* CD8 Dia T0 vs Ctrl.
* CD8 Dia T1 vs Ctrl.
* CD4 Dia T0 vs Ctrl.
* CD4 Dia T1 vs Ctrl.

In [None]:
plot_four_gsea_condition  <- function(gsea, fgsea_set){
   fgsea_sets  <- fgsea_set
    p  <- plotEnrichment(fgsea_sets[[gsea]],
               ranks_cd8_t0) + labs(title="CD8 Dia T0 vs Ctrl.") + ylim(-0.8,0.8) + ggtheme() +
    plotEnrichment(fgsea_sets[[gsea]],
               ranks_cd8_t1) + labs(title="CD8 Dia T1 vs Ctrl.") + ylim(-0.8,0.8) + ggtheme() +
    plotEnrichment(fgsea_sets[[gsea]],
               ranks_cd4_t0) + labs(title="CD4 Dia T0 vs Ctrl.") + ylim(-0.8,0.8) + ggtheme() +
    plotEnrichment(fgsea_sets[[gsea]],
               ranks_cd4_t1) + labs(title="CD4 Dia T1 vs Ctrl.") + ylim(-0.8,0.8) +  ggtheme() + 
    plot_annotation(title = gsea)
    
    ggsave(p, filename = paste0("../figures/gsea/",gsea,".svg"), width = 24, height = 24, units = "cm")
    return(p)
}

In [None]:
library(patchwork)

In [None]:
ggtheme <- function() {
  theme(
    axis.text = element_text(size = 20),
    axis.title = element_text(size = 20),
    text = element_text(size = 20, colour = "black"),
    legend.text = element_text(size = 20),
    legend.key.size =  unit(10, units = "points")
    
  )
}

In [None]:
plot_four_gsea_condition("GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP", fgsea_sets_hs_c7)

In [None]:
plot_four_gsea_condition("GSE9650_NAIVE_VS_EFF_CD8_TCELL_UP", fgsea_sets_hs_c7)

In [None]:
plot_four_gsea_condition("GSE9650_NAIVE_VS_EFF_CD8_TCELL_DN", fgsea_sets_hs_c7)

In [None]:
plot_four_gsea_condition("GSE22886_NAIVE_CD8_TCELL_VS_NKCELL_UP", fgsea_sets_hs_c7)

In [None]:
plot_four_gsea_condition("GSE22886_NAIVE_CD8_TCELL_VS_NKCELL_DN", fgsea_sets_hs_c7)

In [None]:
plot_four_gsea_condition("GSE22886_NAIVE_CD4_TCELL_VS_NKCELL_DN", fgsea_sets_hs_c7)

In [None]:
plot_four_gsea_condition("GSE22886_NAIVE_CD4_TCELL_VS_NKCELL_UP", fgsea_sets_hs_c7)

# GSEA in KetoAcidosis and Remission

We will use the same approach to plot GSEA in ketoacidosis and PR. 

In [None]:
library(msigdbr)

In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "C7")


In [None]:
fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
paths_fc  <- list.files("../../240218_VN_Diabetes_V05/tables/fold_change", full.names = T)
dataset_names  <- list.files("../../240218_VN_Diabetes_V05/tables/fold_change", full.names = F)


In [None]:
paths_fc  <- paths_fc[which(grepl(paths_fc, pattern = "ketoacidosis")|grepl(paths_fc, pattern = "PR_0_T0_vs_PR_1_T0"))]

In [None]:
dataset_names  <- dataset_names[which(grepl(dataset_names, pattern = "ketoacidosis")|grepl(dataset_names, pattern = "PR_0_T0_vs_PR_1_T0"))]

In [None]:
fc.df  <- read_csv(paths_fc[1])  %>% as.data.frame()

In [None]:
fc.df

In [None]:
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Keto_1 T0")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks_cd4_keto <- deframe(fc.df)

In [None]:
fc.df  <- read_csv(paths_fc[2])  %>% as.data.frame()

In [None]:
fc.df

In [None]:
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "PR_0 T0")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks_cd4_pr <- deframe(fc.df)

In [None]:
ranks_cd4_pr

CD8 populations

In [None]:
paths_fc[25]

In [None]:
fc.df  <- read_csv(paths_fc[25])  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Keto_1 T0")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks_cd8_keto <- deframe(fc.df)

In [None]:
paths_fc[26]

In [None]:
fc.df  <- read_csv(paths_fc[26])  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "PR_0 T0")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks_cd8_pr <- deframe(fc.df)

In [None]:
library(patchwork)

In [None]:
plot_four_gsea_condition  <- function(gsea, fgsea_set){
   fgsea_sets  <- fgsea_set
    p  <- plotEnrichment(fgsea_sets[[gsea]],
               ranks_cd4_keto) + labs(title="CD4 Keto yes vs no at T0") + ylim(-0.8,0.8) + ggtheme() +
    plotEnrichment(fgsea_sets[[gsea]],
               ranks_cd4_pr) + labs(title="CD4 PR no vs yes at T0") + ylim(-0.8,0.8) + ggtheme() +
    plotEnrichment(fgsea_sets[[gsea]],
               ranks_cd8_keto) + labs(title="CD8 Keto yes vs no at T0") + ylim(-0.8,0.8) + ggtheme() +
    plotEnrichment(fgsea_sets[[gsea]],
               ranks_cd8_pr) + labs(title="CD8 PR no vs yes at T0") + ylim(-0.8,0.8) +  ggtheme() + 
    plot_annotation(title = gsea)
    
    ggsave(p, filename = paste0("../figures/gsea/",gsea,".svg"), width = 24, height = 24, units = "cm")
    return(p)
}

In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "C2")
fgsea_sets_hs_c2 <- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
fgsea_sets_hs_c2[["REACTOME_INTERFERON_ALPHA_BETA_SIGNALING"]]

In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "C7")
fgsea_sets_hs_c7 <- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
fgsea_sets_hs_c7[["GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN"]]

In [None]:
plot_four_gsea_condition("GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN", fgsea_sets_hs_c7)

In [None]:
plot_four_gsea_condition("GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP", fgsea_sets_hs_c7)

In [None]:
plot_four_gsea_condition("GSE9650_NAIVE_VS_EFF_CD8_TCELL_DN", fgsea_sets_hs_c7)

In [None]:
plot_four_gsea_condition("GSE9650_NAIVE_VS_EFF_CD8_TCELL_UP", fgsea_sets_hs_c7)

## C7 pathways

In [None]:
gsea_all_pathways  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "PR_0_T0")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "PR_0 T0"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)
        } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Keto_1 T0"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)       
    }
fgseaRes <- fgsea(pathways = fgsea_sets, 
                 stats = ranks)
 fgseaRes$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fgseaRes)
    }

In [None]:
gsea  <- map(.x = c(1:length(paths_fc)), .f = gsea_all_pathways)

In [None]:
gseas  <- bind_rows(gsea)

In [None]:
gseas  %>% arrange(padj)

In [None]:
dir.create("../tables/gsea")

In [None]:
gseas_df  <- gseas  %>% as.data.frame

In [None]:
gseas_df  <- gseas_df  %>% mutate(
    leadingEdge2 = paste(leadingEdge))

In [None]:
gseas_df$leadingEdge  <- NULL

In [None]:
write.csv(gseas_df, "../tables/gsea/gsea_fold_changes_for_heatmap_ketoPR.csv")

In [None]:
gseas_df

## Hallmark pathways

In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "H")


fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
names(fgsea_sets)

In [None]:
gsea_all_pathways  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "PR_0_T0")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "PR_0 T0"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)
        } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Keto_1 T0"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)       
    }
fgseaRes <- fgsea(pathways = fgsea_sets, 
                 stats = ranks)
 fgseaRes$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fgseaRes)
    }

In [None]:
gsea  <- map(.x = c(1:length(paths_fc)), .f = gsea_all_pathways)

In [None]:
gseas  <- bind_rows(gsea)

In [None]:
gseas  %>% arrange(padj)

In [None]:
gseas_df  <- gseas  %>% as.data.frame

In [None]:
gseas_df  <- gseas_df  %>% mutate(
    leadingEdge2 = paste(leadingEdge))

In [None]:
gseas_df$leadingEdge  <- NULL

In [None]:
write.csv(gseas_df, "../tables/gsea/gsea_fold_changes_for_heatmap_hallmark_ketoPR.csv")

In [None]:
gseas_df

# Heatmap of selected GSEAS

In [None]:
paths_fc  <- list.files("../../240617_VN_Diabetes_V06/tables/gsea/", full.names = T)


In [None]:
paths_fc  <- paths_fc[grepl(paths_fc, pattern = "gsea_fold_changes_for_heatmap")]

In [None]:
paths_fc

In [None]:
test  <- data.table::fread(paths_fc[3])

In [None]:
test

In [None]:
library(data.table)

In [None]:
gseas  <- map(paths_fc, .f = fread)

In [None]:
gseas  <- bind_rows(gseas)

In [None]:
gseas$population  <- substr(gseas$dataset, 1,11)

In [None]:
table(gseas$population)

In [None]:
gseas  <- gseas  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

In [None]:
gseas$population2  %>% table

In [None]:
gseas  <- gseas  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


In [None]:
gseas$test_type  %>% table

In [None]:
gseas

In [None]:
gs  <- gseas  %>% group_by(dataset)  %>% slice_max(order_by = -log10(padj), n = 10)

In [None]:
write.csv(gs, "../tables/gsea/v07heatmap_top_pathways.csv")

In [None]:
selected_pathways  <- c('HALLMARK_TNFA_SIGNALING_VIA_NFKB',
'HALLMARK_OXIDATIVE_PHOSPHORYLATION',

'HALLMARK_INTERFERON_ALPHA_RESPONSE',
'HALLMARK_INTERFERON_GAMMA_RESPONSE',
                        "GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP",
                        "GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP",
                        
"GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_UP",
"GOLDRATH_NAIVE_VS_MEMORY_CD8_TCELL_DN",
'GOLDRATH_EFF_VS_MEMORY_CD8_TCELL_UP',
'GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_DN'
)

Check and correct the population names

In [None]:
names(gseas$population2  %>% table)[!(names(gseas$population2  %>% table) %in% c(
'cd8_l3_prol',
'cd8_l3_tcm_',
'cd4_l3_th1t7',
'cd4_l3_temr',
'cd8_l3_temr',
'cd4_l3_tfh_',
'cd8_l3_naiv',
'cd4_l3_naiv',
'cd4_l3_th2_',
'cd4_l3_nfkb',
'cd4_l2_unc_',
'cd4_l3_treg',
'cd4_l3_isag',
'cd4_l3_prol',
'cd8_l3_tem_',
'cd8_l2_nk',
'cd8_l2_unc_'    
))]

Plot and save the results. 

In [None]:
options(repr.plot.height = 8, repr.plot.width = 24)
gseas  %>% dplyr::filter(pathway %in% selected_pathways &
                        ((population2 %in% c("cd8_l1_full","cd4_l1_full","cd4_l2_subc","cd8_l2_subc")) == F))  %>% 
ggplot(aes(x = factor(population2, levels = c(
'cd8_l3_prol',
'cd8_l3_tcm_',
'cd4_l3_th1t',
'cd4_l3_temr',
'cd8_l3_temr',
'cd4_l3_tfh_',
'cd8_l3_naiv',
'cd4_l3_naiv',
'cd4_l3_th2_',
'cd4_l3_nfkb',
'cd4_l2_unc_',
'cd4_l3_treg',
'cd4_l3_isag',
'cd4_l3_prol',
'cd8_l3_tem_',
'cd8_l2_nk',
'cd8_l2_unc_'    
)), y = pathway)) +
geom_point(aes(size = -log(padj), color = NES, alpha = padj<0.05)) +
scale_alpha_manual(values = c(0,1)) +
facet_grid(cols = vars(test_type), space = "free", scales = "free") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
xlab("") + ylab("") +
theme_classic() + theme(axis.text.x = element_text(angle = 90),
                       axis.ticks.x = element_blank()) + ggtheme()
ggsave("../figures/gsea/heatmap_gsea.svg", width = 55, height = 16, units = "cm", create.dir = TRUE)

For revisions, let's plot it separately for CD4 and CD8. 

### CD8

In [None]:
options(repr.plot.height = 8, repr.plot.width = 24)
gseas  %>% dplyr::filter(pathway %in% selected_pathways &
                        ((population2 %in% c("cd8_l1_full","cd4_l1_full","cd4_l2_subc","cd8_l2_subc")) == F))  %>% 
dplyr::filter(grepl(population2, pattern = "cd8"))  %>% 
ggplot(aes(x = factor(population2, levels = c(
'cd8_l3_prol',
'cd8_l3_tcm_',
'cd4_l3_th1t',
'cd4_l3_temr',
'cd8_l3_temr',
'cd4_l3_tfh_',
'cd8_l3_naiv',
'cd4_l3_naiv',
'cd4_l3_th2_',
'cd4_l3_nfkb',
'cd4_l2_unc_',
'cd4_l3_treg',
'cd4_l3_isag',
'cd4_l3_prol',
'cd8_l3_tem_',
'cd8_l2_nk',
'cd8_l2_unc_'    
)), y = pathway)) +
geom_point(aes(size = -log(padj), color = NES, alpha = padj<0.05)) +
scale_alpha_manual(values = c(0,1)) +
facet_grid(cols = vars(test_type), space = "free", scales = "free") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
xlab("") + ylab("") +
theme_classic() + theme(axis.text.x = element_text(angle = 90),
                       axis.ticks.x = element_blank()) + ggtheme()
ggsave("../figures/gsea/heatmap_gsea_cd8.svg", width = 55, height = 16, units = "cm", create.dir = TRUE)

### CD4

In [None]:
options(repr.plot.height = 8, repr.plot.width = 24)
gseas  %>% dplyr::filter(pathway %in% selected_pathways)  %>% 
#&
                        #((population2 %in% c("cd8_l1_full","cd4_l1_full","cd4_l2_subc","cd8_l2_subc")) == F))  %>% 
dplyr::filter(grepl(population2, pattern = "cd4"))  %>% 
ggplot(aes(x = factor(population2, levels = c(
'cd8_l3_prol',
'cd8_l3_tcm_',
'cd4_l3_th1t',
'cd4_l3_temr',
'cd8_l3_temr',
'cd4_l3_tfh_',
'cd8_l3_naiv',
'cd4_l3_naiv',
'cd4_l3_th2_',
'cd4_l3_nfkb',
'cd4_l2_unc_',
'cd4_l3_treg',
'cd4_l3_isag',
'cd4_l3_prol',
'cd8_l3_tem_',
'cd8_l2_nk',
'cd8_l2_unc_'    
)), y = pathway)) +
geom_point(aes(size = -log(padj), color = NES, alpha = padj<0.05)) +
scale_alpha_manual(values = c(0,1)) +
facet_grid(cols = vars(test_type), space = "free", scales = "free") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
xlab("") + ylab("") +
theme_classic() + theme(axis.text.x = element_text(angle = 90),
                       axis.ticks.x = element_blank()) + ggtheme()
ggsave("../figures/gsea/heatmap_gsea_cd8.svg", width = 55, height = 16, units = "cm", create.dir = TRUE)

In [None]:
selected_pathways  <- c(
 'HALLMARK_INTERFERON_ALPHA_RESPONSE',
'HALLMARK_INTERFERON_GAMMA_RESPONSE',
'GSE13485_DAY7_VS_DAY21_YF17D_VACCINE_PBMC_UP',
'GOLDRATH_EFF_VS_MEMORY_CD8_TCELL_UP',
'GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_DN')

options(repr.plot.height = 5, repr.plot.width = 24)
gseas  %>% dplyr::filter(pathway %in% selected_pathways)  %>% 
ggplot(aes(x = population, y = factor(pathway, levels = selected_pathways))) +
geom_point(aes(size = -log(padj), color = NES, alpha = padj<0.05)) +
scale_alpha_manual(values = c(0,1)) +
facet_grid(cols = vars(test_type), space = "free", scales = "free") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
theme_classic() + theme(axis.text.x = element_text(angle = 90)) + ggtheme()

In [None]:
selected_pathways  <- c(
    "GSE25087_FETAL_VS_ADULT_TCONV_UP",
                       "GSE36476_CTRL_VS_TSST_ACT_40H_MEMORY_CD4_TCELL_YOUNG_DN",
                       "GSE22886_NAIVE_CD4_TCELL_VS_48H_ACT_TH2_DN",
    "GSE24634_NAIVE_CD4_TCELL_VS_DAY7_IL4_CONV_TREG_UP",
"GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP",
    "GSE36476_CTRL_VS_TSST_ACT_72H_MEMORY_CD4_TCELL_YOUNG_UP")

options(repr.plot.height = 5, repr.plot.width = 24)
gseas  %>% dplyr::filter(pathway %in% selected_pathways)  %>% 
ggplot(aes(x = population, y = factor(pathway, levels = selected_pathways))) +
geom_point(aes(size = -log(padj), color = NES, alpha = padj<0.05)) +
scale_alpha_manual(values = c(0,1)) +
facet_grid(cols = vars(test_type), space = "free", scales = "free") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
theme_classic() + theme(axis.text.x = element_text(angle = 90)) + ggtheme()

In [None]:
gseas  %>% dplyr::filter(test_type == "_ketoacidosis" & padj < 0.05 & NES > 0)  %>% 
group_by(pathway)  %>% tally()  %>% arrange(desc(n))

In [None]:
gseas  %>% dplyr::filter(pathway %in% c("HALLMARK_INTERFERON_ALPHA_RESPONSE"))

# DE genes visualization volcano plot

In [None]:
library(tidytext)

In [None]:
all_markers$dataset  %>% table

In [None]:
all_markers  %>% 
dplyr::filter(source == "scRNAseq_RNA")  %>% 
dplyr::filter(test_type == "Dia T0 vs Ctrl T0" &
             dataset == "cd4_l3_proliferating")  %>% 
mutate(abs_avg_log2fc = abs(avg_log2FC))  %>% 
group_by( direction)  %>% 
slice_max(n = 10, order_by = abs_avg_log2fc) 

In [None]:
test  <- all_markers  %>% 
dplyr::filter(source == "scRNAseq_RNA")  %>% 
group_by(test_type, dataset)  %>% tally  %>% 
pivot_wider(names_from = dataset, values_from = n)

In [None]:
write.csv(test, "../tables/fc_test_counts.csv")

In [None]:
options(repr.plot.width = 24, repr.plot.height = 68)
all_markers  %>% 
dplyr::filter(source == "scRNAseq_RNA")  %>% 
mutate(abs_avg_log2fc = abs(avg_log2FC))  %>% 
group_by(test_type, direction)  %>% 
slice_max(n = 50, order_by = abs_avg_log2fc)  %>% 
ggplot(aes(x = dataset, y = reorder_within(gene, avg_log2FC, test_type))) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = direction)) + 
facet_grid(rows = vars(test_type), cols = vars(dataset),
           scales = "free", space = "free") +
scale_x_reordered() +
      theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_text(angle = 90))

In [None]:
list.files("../tables/fold_change/")

In [None]:
paths_fc  <- list.files("../tables/fold_change", full.names = T)
dataset_names  <- list.files("../tables/fold_change", full.names = F)


In [None]:
read_fc  <- function(j) {
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC))
fc.df$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
   
    return(fc.df)
    }

In [None]:
paths_fc

In [None]:
all_fcs  <- map(1:length(paths_fc), .f = read_fc)

In [None]:
all_fcs2  <- bind_rows(all_fcs)

In [None]:
all_fcs2  %>% head

In [None]:
all_fcs2$`...1`  <- NULL

In [None]:
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "full_filt", replacement = "full")

In [None]:
all_fcs2  %>% dplyr::filter(grepl(dataset, pattern = "cd4_l1_full"))  %>% 
group_by(dataset) %>% tally

In [None]:
all_fcs3

In [None]:
test2  <- all_fcs3  %>% 
group_by(Comparison, Dataset)  %>% tally  %>% 
pivot_wider(names_from = Dataset, values_from = n)

write.csv(test2, "../tables/fc_test_count2.csv")

In [None]:
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_Ctrl_vs_DiaT0", replacement = "--DiaT0 vs. Ctrl")

In [None]:
all_fcs2  %>% dplyr::filter(grepl(dataset, pattern = "cd4_l1_full"))  %>% 
group_by(dataset) %>% tally

In [None]:
gsub(grep(all_fcs2$dataset,pattern = "0_T1_vs_PR_1_T1", value = T)  %>% head, 
     pattern = "_PR_0_T1_vs_PR_1_T1", replacement = "--PR0_T1 vs. PR1_T1")

In [None]:
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_Ctrl_vs_DiaT1", replacement = "--DiaT1 vs. Ctrl")
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_ketoacidosis", replacement = "--Keto+ vs. Keto-")
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_PR_0_T0_vs_PR_0_T1", replacement = "--PR0_T0 vs. PR0_T1")
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_PR_0_T0_vs_PR_1_T0", replacement = "--PR0_T0 vs. PR1_T0")
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_PR_1_T0_vs_PR_1_T1", replacement = "--PR1_T0 vs. PR1_T1")
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_Ctrl_vs_DiaT0", replacement = "--DiaT0 vs. Ctrl")
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_DiaT0_vs_DiaT1", replacement = "--DiaT0 vs. DiaT1")

In [None]:
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_PR_0_T1_vs_PR_1_T1", replacement = "--PR0_T1 vs. PR1_T1")

In [None]:
all_fcs2  %>% dplyr::filter(grepl(dataset, pattern = "cd4_l1_full"))  %>% 
group_by(dataset) %>% tally

In [None]:
all_fcs2  <- all_fcs2  %>% separate(dataset, into = c("Dataset", "Comparison"), sep = "--", remove = F)

In [None]:
all_fcs2  <- all_fcs2  %>% dplyr::filter(!is.na(Comparison))

In [None]:
dataset_comparison  <- levels(factor(all_fcs2$dataset))

In [None]:
all_fcs2$Comparison  %>% table

In [None]:
all_fcs2$dataset  %>% table

In [None]:
all_fcs2$Comparison  %>% table

In [None]:
all_fcs2  <- all_fcs2  %>% mutate(
direction = case_when(Comparison == "DiaT1 vs. Ctrl" & cluster == "Ctrl T0" ~ "Down",
                      Comparison == "DiaT1 vs. Ctrl" & cluster == "Dia T1" ~ "Up",
                      
                      Comparison == "DiaT0 vs. Ctrl" & cluster == "Ctrl T0" ~ "Down",
                      Comparison == "DiaT0 vs. Ctrl" & cluster == "Dia T0" ~ "Up",
                      
                      Comparison == "DiaT0 vs. DiaT1" & cluster == "Dia T0" ~ "Up",
                      Comparison == "DiaT0 vs. DiaT1" & cluster == "Dia T1" ~ "Down",
                      
                      Comparison == "PR0_T0 vs. PR0_T1" & cluster == "PR_0 T0" ~ "Up",
                      Comparison == "PR0_T0 vs. PR0_T1" & cluster == "PR_0 T1" ~ "Down",
                      
                      Comparison == "PR0_T0 vs. PR1_T0" & cluster == "PR_0 T0" ~ "Up",
                      Comparison == "PR0_T0 vs. PR1_T0" & cluster == "PR_1 T0" ~ "Down",
                      
                      Comparison == "PR0_T1 vs. PR1_T1" & cluster == "PR_0 T1" ~ "Up",
                      Comparison == "PR0_T1 vs. PR1_T1" & cluster == "PR_1 T1" ~ "Down",
                      
                      Comparison == "PR1_T0 vs. PR1_T1" & cluster == "PR_1 T0" ~ "Up",
                      Comparison == "PR1_T0 vs. PR1_T1" & cluster == "PR_1 T1" ~ "Down",
                      
                      Comparison == "Keto+ vs. Keto-" & cluster == "Keto_0 T0" ~ "Down",
                      Comparison == "Keto+ vs. Keto-" & cluster == "Keto_1 T0" ~ "Up"
                      
                     ))

In [None]:
gc()

In [None]:
head(all_fcs2)

In [None]:
dataset_list  <- levels(factor(all_fcs2$dataset))


In [None]:
dataset_list

In [None]:
i = dataset_list[1]

In [None]:
all_fcs2  %>% dplyr::filter(dataset == i & p_val_adj < 0.01)  %>% 
    group_by(direction)  

In [None]:
Comparison_list  <- levels(factor(all_fcs2$Comparison))

In [None]:
i = Comparison_list[1]

In [None]:
genes  <- all_fcs2  %>% dplyr::filter(Comparison == i & p_val_adj < 0.01)  %>% 
    group_by(direction, Dataset)  %>% slice_max(n = 10, order_by = avg_log2FC)   %>% pull(gene)

In [None]:
genes

In [None]:
all_fcs2$signif = ifelse(all_fcs2$p_val_adj < 0.05, "y", "n")

In [None]:
all_fcs2  <- all_fcs2  %>% mutate(p_val_adj = if_else(p_val_adj == 0, 1e-294,p_val_adj))

In [None]:
library(tidytext)

In [None]:
Datasets_list  <- levels(factor(all_fcs2$Dataset))

In [None]:
i = Datasets_list[1]

In [None]:
all_fcs2  %>% head

In [None]:
 genes  <- all_fcs2  %>% dplyr::filter(Dataset == i & Comparison == "DiaT0 vs. Ctrl")  %>% 
    group_by(direction)  %>% slice_max(n = 10, order_by = avg_log2FC)   %>% pull(gene)
    df  <- all_fcs2  %>% dplyr::filter(Dataset == i & direction == "Up" & gene %in% genes & 
                                       Comparison == "DiaT0 vs. Ctrl"                                      )

In [None]:
genes

In [None]:
df

In [None]:
j = 0


In [None]:
plot_list[[i]]

In [None]:
plot_list

In [None]:
Comaprisons_list  <- levels(factor(all_fcs2$Comparison  ))

In [None]:
dir.create("../figures/DE_genes/pdf/")

In [None]:
j = Comparison_list[2]
    
plot_list  <- list()    
  
for(i in Datasets_list){
    genes  <- all_fcs2  %>% dplyr::filter(Dataset == i & Comparison == j)  %>% 
    group_by(direction)  %>% slice_max(n = 10, order_by = avg_log2FC)   %>% pull(gene)
    df  <- all_fcs2  %>% dplyr::filter(Dataset == i & direction == "Up" & 
                                       gene %in% genes & Comparison == j)
    p  <- df  %>% ggplot(aes(x = Dataset, y = reorder(gene, avg_log2FC))) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = avg_log2FC < 0)) + 
    scale_size_manual() + 
#facet_grid(rows = vars(test_type), cols = vars(dataset),
#           scales = "free", space = "free") +
scale_x_reordered() +
      theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_blank(),
       axis.ticks.x = element_blank()) + NoLegend() + ylab("") + xlab("") +
    ggtitle(i)
    plot_list[[i]]  <- p
}  
    
pdf(file = paste0("../figures/DE_genes/pdf/",j,".pdf"), width = 15, height = 18)
title <- ggdraw() + 
  draw_label(
   j,
    fontface = 'bold',
    x = 0,
    hjust = 0
  ) +
  theme(
    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 7)
  )
plot_grid(
  title, cowplot::plot_grid(plotlist = plot_list, ncol = 7) ,
  ncol = 1,
  # rel_heights values control vertical title margins
  rel_heights = c(0.05,1)
)
dev.off()
    



In [None]:
all_fcs2  %>% dplyr::filter(Comparison == j)  %>% 
  ggplot(aes(x = avg_log2FC, y = -log10(p_val_adj), color = Dataset)) + 
  geom_point() + 
        theme_minimal() +
        #geom_text_repel() +
        #scale_color_manual(values=c("blue", "black", "red")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red")

In [None]:
library(ggrepel)
# plot adding up all layers we have seen so far
       

In [None]:
head(all_fcs2)

In [None]:
gc()

In [None]:
rm(all_fcs)

In [None]:
ls()

In [None]:
all_fcs3  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC)>0  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>0.9&p_val_adj<0.05,gene,NA_character_))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.6&p_val_adj<0.05,Dataset,"grey"))       

In [None]:
all_fcs3  %>% head

In [None]:
all_fcs3  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC>0 & !grepl(gene, pattern = "^MTRN"))  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>1&p_val_adj<0.05,gene,
                     ifelse(abs_avg_log2FC2>0.6&p_val_adj<10e-100,gene,NA_character_)))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.6&p_val_adj<0.05,Dataset,"grey"))       

In [None]:
all_fcs3  %>% head

In [None]:
all_fcs3  %>% dplyr::filter(Comparison == j)  %>% 
  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  geom_point() + 
        theme_minimal() +
        geom_text_repel() +
        #scale_color_manual(values=c("blue", "black", "red")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red")

In [None]:
all_fcs3  %>% dplyr::filter(Comparison == j)  %>% 
  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  geom_point() + 
        theme_minimal() +
        geom_text_repel() +
        #scale_color_manual(values=c("blue", "black", "red")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red")

In [None]:
all_fcs3$Dataset  %>% table  %>% length

In [None]:
options(repr.plot.width = 13, repr.plot.height = 7)
all_fcs3  %>% dplyr::filter(Comparison == j)  %>% 
  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label, shape = color)) + 
  geom_point() + 
        theme_minimal() +
        geom_text_repel() +
        scale_color_manual(values=c(scales::hue_pal()(17), "grey88")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25))

In [None]:
options(repr.plot.width = 13, repr.plot.height = 7)
all_fcs3  %>% dplyr::filter(Comparison == j)  %>% 
  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label, shape = color)) + 
  geom_point() + 
        theme_minimal() +
        geom_text_repel() +
        scale_color_manual(values=c(scales::hue_pal()(17), "grey88")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25))

In [None]:
all_fcs3$cd4_vs_cd8  <- substr(all_fcs3$Dataset,1,3)

In [None]:
options(repr.plot.width = 18, repr.plot.height = 6)
df10  <- all_fcs3  %>% dplyr::filter(Comparison == j) 
df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label, shape = color)) + 
  geom_point() + 
        theme_minimal() +
        geom_text_repel(size = 6) +
facet_wrap(~cd4_vs_cd8) +
        scale_color_manual(values=c(scales::hue_pal()(length(levels(factor(df10$color)))-1), "grey88")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()

In [None]:
all_fcs3  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC>0 & !grepl(gene, pattern = "^MTRN"))  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>0.9&p_val_adj<0.05,gene,
                     ifelse(abs_avg_log2FC2>0.6&p_val_adj<10e-100,gene,NA_character_)))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.6&p_val_adj<0.05,Dataset,"grey"))       

In [None]:
all_fcs3$cd4_vs_cd8  <- substr(all_fcs3$Dataset,1,3)

In [None]:
options(repr.plot.width = 22, repr.plot.height = 8)

for(j in Comparison_list){
    df10  <- all_fcs3  %>% dplyr::filter(Comparison == j) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2)) + 
        theme_minimal() +
        geom_text_repel(size = 6) +
facet_wrap(~cd4_vs_cd8) +
        scale_color_manual(values=c(scales::hue_pal()(length(levels(factor(df10$color)))-1), "grey88")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
}

In [None]:
for(j in Comparison_list){
    df10  <- all_fcs3  %>% dplyr::filter(Comparison == j) 
 print(df10  %>%  group_by(Dataset)  %>% tally)
 
}

### Correct Colors

In [None]:
colors_of_populations = c("Mazda RX4" = "red", "Mazda RX4 Wag" = "blue", "Datsun 710" = "green")

In [None]:
levels(factor(all_fcs3$Dataset))

In [None]:
all_fcs3  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC>0 & !grepl(gene, pattern = "^MTRN"))  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>0.585&p_val_adj<0.05,gene,
                     ifelse(abs_avg_log2FC2>0.585&p_val_adj<10e-100,gene,NA_character_)))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.585&p_val_adj<0.05,Dataset,"grey"))   %>% 
mutate(cd4_vs_cd8 = substr(Dataset,1,3))

In [None]:
colors_of_populations = c(
    'cd4_l1_full' = "#ffa66aff",
    'cd4_l2_subcluster'  = "#ffa66aff",
    'cd4_l2_unc'  = "#cd3333ff",
    'cd4_l3_isaghi'  = "#a46464ff",
    'cd4_l3_naive'  = "#a58413ff",
    'cd4_l3_nfkb'  = "#a46c43ff",
    'cd4_l3_proliferating'  = "#9b3097ff", 
    'cd4_l3_temra'  = "#7d252aff",
    'cd4_l3_tfh'  = "#8e6a1dff",
    'cd4_l3_th1th17'  = "#a94e4eff",
    'cd4_l3_th2'  = "#b36672ff",
    'cd4_l3_treg'  = "#ba6d8dff",
    'cd8_l1_full'  = "#67934aff",
    'cd8_l2_nk'  = "#105292ff",
    'cd8_l2_subcluster' = "#67934aff", 
    'cd8_l2_unc'  = "#6d00c0ff",
    'cd8_l3_naive'  = "#004455ff",
    'cd8_l3_prolif'  = "#5f711aff",
    'cd8_l3_tcm'  = "#427c39ff",
    'cd8_l3_tem'  = "#004455ff",
    'cd8_l3_temra'  = "#16481fff",
    "grey" = "grey88"
)

In [None]:
for(j in Comparison_list){
    df10  <- all_fcs3  %>% dplyr::filter(Comparison == j & !(Dataset %in% c("cd4_l1_full", "cd8_l2_subcluster", 
                                                                            "cd4_l2_subcluster", "cd8_l1_full",
                                                                           "cd8_l2_nk"))) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2)) + 
        theme_minimal() +
        geom_text_repel(size = 6) +
facet_wrap(~cd4_vs_cd8) +
        scale_color_manual(values=colors_of_populations) +
        geom_vline(xintercept=c(-0.585, 0.585), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
}

In [None]:
all_fcs4  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC>0 & !grepl(gene, pattern = "^MTRN"))  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>0.322&p_val_adj<0.05,gene,
                     ifelse(abs_avg_log2FC2>0.322&p_val_adj<10e-100,gene,NA_character_)))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.322&p_val_adj<0.05,Dataset,"grey"))   %>% 
mutate(cd4_vs_cd8 = substr(Dataset,1,3))

In [None]:
options(repr.plot.width = 12, repr.plot.height = 6)

for(j in Comparison_list){
    df10  <- all_fcs4  %>% dplyr::filter(Comparison == j & (Dataset %in% c("cd4_l1_full", "cd8_l2_subcluster", 
                                                                          "cd8_l2_nk"))) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2)) + 
        theme_minimal() +
        geom_text_repel(size = 6) +
        scale_color_manual(values=colors_of_populations) +
        geom_vline(xintercept=c(-0.322, 0.322), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
}

### Without Y genes

In [None]:
library(biomaRt)
mart <- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")

In [None]:
results <- getBM(attributes = c("chromosome_name", "hgnc_symbol"),
           filters = "chromosome_name", values = "Y", mart = mart)

In [None]:
results

In [None]:
all_fcs4  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC>0 
              & !grepl(gene, pattern = "^MTRN")
             & !grepl(gene, pattern = "^HLA-")
             & !grepl(gene, pattern = "orf")
             & !grepl(gene, pattern = "^AP0")
             & !grepl(gene, pattern = "^H1-"))  %>% 
dplyr::filter(!(gene %in% results$hgnc_symbol))  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>0.322&p_val_adj<0.05,gene,
                     ifelse(abs_avg_log2FC2>0.322&p_val_adj<10e-100,gene,NA_character_)))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.322&p_val_adj<0.05,Dataset,"grey"))   %>% 
mutate(cd4_vs_cd8 = substr(Dataset,1,3))

In [None]:
all_fcs4  %>% dplyr::filter(gene == "ASCL2")

In [None]:
options(repr.plot.width = 12, repr.plot.height = 6)

for(j in Comparison_list){
    df10  <- all_fcs4  %>% dplyr::filter(Comparison == j & (Dataset %in% c("cd4_l1_full", "cd8_l2_subcluster"))) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2), dpi = 300) + 
        theme_minimal() +
        geom_vline(xintercept=c(-0.322, 0.322), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +geom_text_repel(size = 3) +
        scale_color_manual(values=colors_of_populations) +
        scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
    ggsave(paste0("../figures/volcano/cd4_and_cd8_",j,".svg"), width = 10, height = 6)
}

In [None]:
library(ggrepel)

In [None]:
all_fcs5  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC>0 
              & !grepl(gene, pattern = "^MTRN")
             & !grepl(gene, pattern = "^HLA-")
             & !grepl(gene, pattern = "orf")
             & !grepl(gene, pattern = "^AP0")
             & !grepl(gene, pattern = "^H1-"))  %>% 
dplyr::filter(!(gene %in% results$hgnc_symbol))  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>0.2&p_val_adj<0.05,gene,
                     ifelse(abs_avg_log2FC2>0.2&p_val_adj<10e-100,gene,NA_character_)))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.2&p_val_adj<0.05,Dataset,"grey"))   %>% 
mutate(cd4_vs_cd8 = substr(Dataset,1,3))

In [None]:
options(repr.plot.width = 12, repr.plot.height = 6)

for(j in Comparison_list){
    df10  <- all_fcs5 %>% dplyr::filter(Comparison == j & (Dataset %in% c("cd4_l3_treg"))) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2), dpi = 300) + 
        theme_minimal() +
        geom_vline(xintercept=c(-0.2, 0.2), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +geom_text_repel(size = 6, max.overlaps = 15) +
        scale_color_manual(values=c("#ff5599ff","grey88")) +
        scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
    ggsave(paste0("../figures/volcano/treg_",j,".svg"), width = 10, height = 6)
}

In [None]:
for(j in Comparison_list){
    df10  <- all_fcs5 %>% dplyr::filter(Comparison == j & (Dataset %in% c("cd8_l2_unc"))) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2), dpi = 300) + 
        theme_minimal() +
        geom_vline(xintercept=c(-0.2, 0.2), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +geom_text_repel(size = 6) +
        scale_color_manual(values=c("#306aa3ff","grey88")) +
        scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
    ggsave(paste0("../figures/volcano/unc_",j,".svg"), width = 10, height = 6)
}

In [None]:
cd4  <- readRDS("../data/processed/L1/cd4_l1_full_filt.rds")

avgexp = AggregateExpression(subset(cd4, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = F, group.by = "Patient_Time", 
                          assay = "RNA")

In [None]:
avgexp  <- NormalizeData(avgexp)
avgexp  <- ScaleData(avgexp)

In [None]:
dir.create("../figures/volcano")

In [None]:
options(repr.plot.width = 22, repr.plot.height = 8)

for(j in Comparison_list){
    df10  <- all_fcs4  %>% dplyr::filter(Comparison == j & !(Dataset %in% c("cd4_l1_full", "cd8_l2_subcluster", 
                                                                            "cd4_l2_subcluster", "cd8_l1_full",
                                                                           "cd8_l2_nk"))) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2)) + 
        theme_minimal() +
        geom_text_repel(size = 6) +
facet_wrap(~cd4_vs_cd8) +
        scale_color_manual(values=colors_of_populations) +
        geom_vline(xintercept=c(-0.322, 0.322), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
    ggsave(paste0("../figures/volcano/all_populations_",j,".svg"), width = 17, height = 6)
}