# Part 10: DEG analysis 

In this document, we will focus on the analysis of differential expressed genes (DEG). We have already obtained the DEG before in parts 6 and 7 of the analysis. The results will be loaded and built upon.  

In [None]:
source("diabetes_analysis_v07.R")

rank_score_func <- function(df){
df <- df %>% mutate(score = -1*log(p_val_adj+(10^-310))*avg_log2FC*(pct.1/(pct.2+10^-300)))
return(df)
}



We will be using some additional libraries:

In [None]:
library(msigdbr)
library(clusterProfiler)
library(org.Hs.eg.db)
library(enrichplot)
library(EnsDb.Hsapiens.v86)
library(fgsea)

# Load markers, add EnsID

We will now load the markers we calculated in parts 6 and 7 of the analysis pipeline. As we have the The precalculated gene tables can be also downloaded from Zenodo: [DOI: 10.5281/zenodo.14222418](https://zenodo.org/records/14222418) 

In [None]:
markers_cd4  <- read_csv("../tables/de_genes/240319_cd4_all_markers_without_sex.csv")
markers_cd8  <- read_csv("../tables/de_genes/240319_cd8_all_markers_without_sex.csv")

As we have the sample information stored, we can bind markers all toghether. 

In [None]:
all_markers  <- rbind(markers_cd4, markers_cd8)

In [None]:
all_markers$source  %>% table

We will now clean the metadata of the DEG a bit and add the Up or Down direction, which indicated the direction of the change. Up means a gene is expressed more in the worse condition, i.e. T1D compared to Healthy, T1D T0 compared to T1D T1, no partial remission at T1 compared to partial remission at T1, etc. 

In [None]:
all_markers  <- all_markers  %>% mutate(
direction = case_when(test_type == "Dia T1 vs Ctrl T0" & cluster == "Ctrl T0" ~ "Down",
                      test_type == "Dia T1 vs Ctrl T0" & cluster == "Dia T1" ~ "Up",
                      
                      test_type == "Dia T0 vs Ctrl T0" & cluster == "Ctrl T0" ~ "Down",
                      test_type == "Dia T0 vs Ctrl T0" & cluster == "Dia T0" ~ "Up",
                      
                      test_type == "Dia T0 vs Dia T1" & cluster == "Dia T0" ~ "Up",
                      test_type == "Dia T0 vs Dia T1" & cluster == "Dia T1" ~ "Down",
                      
                      test_type == "PR_0 T0 vs PR_0 T1" & cluster == "PR_0 T0" ~ "Up",
                      test_type == "PR_0 T0 vs PR_0 T1" & cluster == "PR_0 T1" ~ "Down",
                      
                      test_type == "PR_0 T0 vs PR_1 T0" & cluster == "PR_0 T0" ~ "Up",
                      test_type == "PR_0 T0 vs PR_1 T0" & cluster == "PR_1 T0" ~ "Down",
                      
                      test_type == "PR_0 T1 vs PR_1 T1" & cluster == "PR_0 T1" ~ "Up",
                      test_type == "PR_0 T1 vs PR_1 T1" & cluster == "PR_1 T1" ~ "Down",
                      
                      test_type == "PR_1 T0 vs PR_1 T1" & cluster == "PR_1 T0" ~ "Up",
                      test_type == "PR_1 T0 vs PR_1 T1" & cluster == "PR_1 T1" ~ "Down",
                      
                      test_type == "Keto_1 T0 vs Keto_0 T0" & cluster == "Keto_0 T0" ~ "Down",
                      test_type == "Keto_1 T0 vs Keto_0 T0" & cluster == "Keto_1 T0" ~ "Up"
                      
                     ))

In [None]:
all_markers  %>% group_by(cluster, test_type, direction)  %>% tally

Add Ensembl IDs for each gene as they are needed for some analyses.

In [None]:
geneIDs1 <- ensembldb::select(EnsDb.Hsapiens.v86, keys= (all_markers$gene  %>% unique ), 
                              keytype = "SYMBOL", columns = c("SYMBOL","ENTREZID"))

In [None]:
colnames(geneIDs1)  <- c("gene", "entrezid")

In [None]:
all_markers2  <- all_markers  %>% 
                    left_join(geneIDs1) 

In [None]:
all_markers2

In [None]:
all_markers$gene_direction  <- paste(all_markers$gene, all_markers$direction)

In [None]:
all_markers2$gene_direction  <- paste(all_markers2$gene, all_markers2$direction)

# Counts of DE genes

### RNA

Let's visualize the counts of DEG in all clusters.

In [None]:
options(repr.plot.width = 10, repr.plot.height = 5)
all_markers  %>% 
group_by(cluster, source, test_type, dataset)  %>% 
dplyr::filter(source == "scRNAseq_RNA")  %>% 
ggplot(aes(x = dataset)) +
geom_bar(aes(fill = test_type), position = "stack") + coord_flip() 

Split up different comparisons:

In [None]:
options(repr.plot.width = 20, repr.plot.height = 5)
all_markers  %>% 
group_by(cluster, source, test_type, dataset)  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type != "Cpept_HI_T1 vs Cpept_LO_T1")  %>% 
ggplot(aes(x = dataset)) +
facet_grid(cols = vars(test_type)) +
geom_bar(aes(fill = cluster), position = "stack") + coord_flip() 

Add a bit of formating.

In [None]:
all_markers  %>% 
group_by(cluster, source, test_type, dataset)  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type != "Cpept_HI_T1 vs Cpept_LO_T1")  %>% 
ggplot(aes(x = dataset)) +
facet_grid(cols = vars(test_type)) +
geom_bar(aes(fill = direction), position = "stack") + coord_flip() +
ggtheme() + 
theme_bw() + 
scale_fill_manual(values = c("#90c2f1ff", "#d0625dff")) + theme(axis.ticks.y = element_blank())


In [None]:
all_markers  %>% 
group_by(cluster, source, test_type, dataset)  %>% 
mutate(dataset = stringr::str_to_upper(gsub(dataset, pattern = "_", replacement = " ")))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & test_type != "Cpept_HI_T1 vs Cpept_LO_T1" &
             test_type != "PR_0 T0 vs PR_0 T1" & test_type != "PR_0 T1 vs PR_1 T1" &
             test_type != "PR_1 T0 vs PR_1 T1" )  %>% 
ggplot(aes(x = dataset)) +
facet_grid(cols = vars(test_type)) +
geom_bar(aes(fill = direction), position = "stack") + coord_flip() +
ggtheme() + 
theme_bw() + 
scale_fill_manual(values = c("#90c2f1ff", "#d0625dff")) + theme(axis.ticks.y = element_blank())


In [None]:
library(svglite)
dir.create("../figures/DE_genes/")
ggsave(filename = "../figures/DE_genes/count_of_RNA_markers.svg", width = 11, height = 4)

### CollecTRI

We can do the same thing of differentially active transcriptional factors estimated by DecoupleR package:

In [None]:
all_markers  %>% 
group_by(cluster, source, test_type, dataset)  %>% 
dplyr::filter(source == "scRNAseq_collecTRI")  %>% 
ggplot(aes(x = dataset)) +
geom_bar(aes(fill = test_type), position = "stack") + coord_flip() 

In [None]:
all_markers  %>% 
group_by(cluster, source, test_type, dataset)  %>% 
dplyr::filter(source == "scRNAseq_collecTRI")  %>% 
ggplot(aes(x = dataset)) +
facet_grid(cols = vars(test_type)) +
geom_bar(aes(fill = cluster), position = "stack") + coord_flip() 

In [None]:
all_markers  %>% dplyr::filter(source == "scRNAseq_collecTRI")  %>% 
mutate(gene_direction = paste(gene, direction))  %>% 
group_by(gene, gene_direction)  %>% tally  %>% arrange(desc(n))

# Genes with similar DE in T0 and T1

In this part, we will look at the markers and check which of them are consistently up- or downregulated at both T0 and T1. 

In [None]:
i = 1

In [None]:
 mrk_clust  <- all_markers  %>% dplyr::filter(source == "scRNAseq_RNA") %>% 
mutate(gene_direction = paste(gene, direction))   %>% 
dplyr::filter(test_type %in% c("Dia T0 vs Ctrl T0", "Dia T1 vs Ctrl T0") &
             dataset == datasets[1])  %>% group_by(gene_direction)   %>% tally()  %>% arrange(desc(n))  %>% 
dplyr::filter(n>1)
mrk_clust$dataset  <- datasets[1]

In [None]:
mrk_clust_all  <- mrk_clust

In [None]:
for(i in 2:length(datasets)){
    mrk_clust  <- all_markers  %>% dplyr::filter(source == "scRNAseq_RNA") %>% 
mutate(gene_direction = paste(gene, direction))   %>% 
dplyr::filter(test_type %in% c("Dia T0 vs Ctrl T0", "Dia T1 vs Ctrl T0") &
             dataset == datasets[i])  %>% group_by(gene_direction)   %>% tally()  %>% arrange(desc(n))  %>% 
dplyr::filter(n>1)
    mrk_clust$dataset  <- datasets[i]
    mrk_clust_all  <- rbind(mrk_clust_all, mrk_clust)
    }


In [None]:
mrk_clust_all  %>% nrow

In [None]:
mrk_clust_all  %>% group_by(gene_direction)  %>% 
tally  %>% arrange(desc(n))  %>% separate(gene_direction, into = c("gene", "direction"),
                                          sep = " ", remove = F)  %>% 
dplyr::filter(direction == "Down")

In [None]:
down_genes  <- mrk_clust_all  %>% group_by(gene_direction)  %>% 
tally  %>% arrange(desc(n))  %>% separate(gene_direction, into = c("gene", "direction"),
                                          sep = " ", remove = F)  %>% 
dplyr::filter(direction == "Down")  %>% pull(gene)

In [None]:
down_genes

In [None]:
up_genesd  <- mrk_clust_all  %>% group_by(gene_direction)  %>% 
tally  %>% arrange(desc(n))  %>% separate(gene_direction, into = c("gene", "direction"),
                                          sep = " ", remove = F)  %>% 
dplyr::filter(direction == "Up")  %>% pull(gene)

In [None]:
up_genesd

In [None]:
all_markers$dataset  %>% table

In [None]:
all_markers

In [None]:
all_markers  %>% dplyr::filter(test_type %in% c("Dia T0 vs Ctrl T0", "Dia T1 vs Ctrl T0") & 
                               source == "scRNAseq_RNA")  %>% 
dplyr::select(p_val_adj, gene, dataset, test_type)  %>% 
pivot_wider(names_from = "test_type", values_from = p_val_adj)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
all_markers  %>% dplyr::filter(test_type %in% c("Dia T0 vs Ctrl T0", "Dia T1 vs Ctrl T0") & 
                               source == "scRNAseq_RNA" & !(grepl(gene, pattern = "MTRN")))  %>% 
dplyr::select(avg_log2FC, gene, dataset, test_type)  %>% 
pivot_wider(names_from = "test_type", values_from = avg_log2FC)  %>% 
ggplot(aes(x = `Dia T0 vs Ctrl T0`, y = `Dia T1 vs Ctrl T0`, label = gene, fill = dataset))  +
geom_point() + geom_label()

In [None]:
options(repr.plot.height = 7, repr.plot.width = 25)
all_markers  %>% 
mutate(is_shared  = gene %in% up_genesd | gene %in% down_genes,
       direction_t0_t1 = ifelse(gene %in% up_genesd, "UP", ifelse(gene %in% down_genes, "DOWN", NA_character_)))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & is_shared == T, 
              test_type %in% c("Dia T0 vs Ctrl T0", "Dia T1 vs Ctrl T0"))  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene), dataset)) + 
facet_grid(cols = vars(direction_t0_t1), rows = vars(test_type), space = "free", scales = "free") +
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = direction)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_text(angle = 90))

In [None]:
paths_fc  <- list.files("../tables/fold_change", full.names = T)
dataset_names  <- list.files("../tables/fold_change", full.names = F)


In [None]:
paths_fc

In [None]:
paths_fc  <- paths_fc[which(grepl(paths_fc, pattern = "Ctrl"))]

In [None]:
dataset_names  <- dataset_names[which(grepl(dataset_names, pattern = "Ctrl"))]

In [None]:
dataset_names

In [None]:
paths_fc

In [None]:
paths_fc  %>% length

In [None]:
all_fcs1  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
  fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia")) 
 fc.df$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 fc.df$dataset  <-   gsub(fc.df$dataset, pattern = "_Ctrl_vs_DiaT0", replacement = "")
 fc.df$comaprison = "DiaT0_vs_Ctrl"   
    
 return(fc.df)
    }

In [None]:
all_fcs1_result  <- map(seq(from = 1, to = 41, by = 2), all_fcs1)

In [None]:
all_fcs_result  <- bind_rows(all_fcs1_result)

In [None]:
all_fcs_result$dataset  %>% table

In [None]:
all_fcs2  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
  fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia")) 
 fc.df$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 fc.df$dataset  <-   gsub(fc.df$dataset, pattern = "_Ctrl_vs_DiaT1", replacement = "")
 fc.df$comaprison = "DiaT1_vs_Ctrl"
    
 return(fc.df)
    }

In [None]:
all_fcs2_result  <- map(seq(from = 2, to = 42, by = 2), all_fcs2)

In [None]:
all_fcs2_result  <- bind_rows(all_fcs2_result)

In [None]:
all_fcs_result  <- rbind(all_fcs_result, all_fcs2_result)

In [None]:
all_fcs_result

In [None]:
all_fcs_result$dataset  %>% table

In [None]:
options(repr.plot.height = 7, repr.plot.width = 25)
all_fcs_result  %>% 
mutate(is_shared  = gene %in% up_genesd | gene %in% down_genes,
       direction_t0_t1 = ifelse(gene %in% up_genesd, "UP", ifelse(gene %in% down_genes, "DOWN", NA_character_)))  %>% 
dplyr::filter(is_shared == TRUE)  %>% 
ggplot(aes(factor(gene), dataset)) + 
facet_grid(cols = vars(direction_t0_t1), rows = vars(comaprison), space = "free", scales = "free") +
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = avg_log2FC, alpha = p_val_adj < 0.05)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
scale_alpha_manual(values = c(0.4,1)) +
 scale_colour_gradient2(low = "blue", mid = "white", high = "red") + 
 theme(axis.text.x = element_text(angle = 90))

In [None]:
options(repr.plot.height = 7, repr.plot.width = 25)
all_markers  %>% 
mutate(is_shared  = gene %in% up_genesd | gene %in% down_genes,
       direction_t0_t1 = ifelse(gene %in% up_genesd, "UP", ifelse(gene %in% down_genes, "DOWN", NA_character_)))  %>% 
dplyr::filter(source == "scRNAseq_RNA" & is_shared == T, 
              test_type %in% c("Dia T0 vs Ctrl T0", "Dia T1 vs Ctrl T0"))  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene), dataset)) + 
facet_grid(cols = vars(direction_t0_t1), rows = vars(test_type), space = "free", scales = "free") +
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = direction)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_text(angle = 90))

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
all_markers  %>% dplyr::filter(test_type %in% c("Dia T0 vs Ctrl T0", "Dia T1 vs Ctrl T0") & 
                               source == "scRNAseq_RNA" & !(grepl(gene, pattern = "MTRN")))  %>% 
dplyr::select(avg_log2FC, gene, dataset, test_type)  %>% 
pivot_wider(names_from = "test_type", values_from = avg_log2FC)  %>% 
ggplot(aes(x = `Dia T0 vs Ctrl T0`, y = `Dia T1 vs Ctrl T0`, label = gene, fill = dataset))  +
geom_point() 

In [None]:
all_fcs_result

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
all_fcs_result  %>% 
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(grepl(gene, pattern = "MTRN")))  %>% 
mutate(gene_in_genes = ifelse(gene %in% c("GZMK","GZMA","TNF","CX3CR1","GZMB"), gene, NA_character_))  %>% 
dplyr::select(avg_log2FC, gene, gene_in_genes, dataset, comaprison)  %>% 
pivot_wider(names_from = "comaprison", values_from = avg_log2FC)  %>% 
ggplot(aes(x = DiaT0_vs_Ctrl, y = DiaT1_vs_Ctrl, label = gene_in_genes, color = dataset))  +
geom_point() + geom_label()

In [None]:
colors_of_populations = c(
    'cd4_l1_full' = "#ffa66aff",
    'cd4_l2_subcluster'  = "#ffa66aff",
    'cd4_l2_unc'  = "#cd3333ff",
    'cd4_l3_isaghi'  = "#a46464ff",
    'cd4_l3_naive'  = "#a58413ff",
    'cd4_l3_nfkb'  = "#a46c43ff",
    'cd4_l3_proliferating'  = "#9b3097ff", 
    'cd4_l3_temra'  = "#7d252aff",
    'cd4_l3_tfh'  = "#8e6a1dff",
    'cd4_l3_th1th17'  = "#a94e4eff",
    'cd4_l3_th2'  = "#b36672ff",
    'cd4_l3_treg'  = "#ba6d8dff",
    'cd8_l1_full'  = "#67934aff",
    'cd8_l2_nk'  = "#105292ff",
    'cd8_l2_subcluster' = "#67934aff", 
    'cd8_l2_unc'  = "#6d00c0ff",
    'cd8_l3_naive'  = "#004455ff",
    'cd8_l3_prolif'  = "#5f711aff",
    'cd8_l3_tcm'  = "#427c39ff",
    'cd8_l3_tem'  = "#004455ff",
    'cd8_l3_temra'  = "#16481fff",
    "grey" = "grey88"
)

In [None]:
options(repr.plot.height = 12, repr.plot.width = 14)

all_fcs_result  %>% 
dplyr::filter(gene %in% all_markers$gene)  %>% 
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(grepl(gene, pattern = "MTRN")) & 
              dataset %in% c("cd4_l1_full_filt", "cd8_l1_full_filt"))  %>% 
dplyr::select(avg_log2FC, gene, dataset, comaprison)  %>% 
pivot_wider(names_from = "comaprison", values_from = avg_log2FC)  %>% 
mutate(gene_in_genes = ifelse(DiaT0_vs_Ctrl > 0.15 & DiaT1_vs_Ctrl > 0.15 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl < -0.2, 
                              gene, NA_character_))  %>% 
ggplot(aes(x = DiaT0_vs_Ctrl, y = DiaT1_vs_Ctrl, label = gene_in_genes, color = dataset))  +
scale_color_manual(values = c("#ffa66aff","#67934aff"))  +
geom_point(aes(shape = dataset)) + 
scale_shape_manual(values = c(21,22)) +
ggrepel::geom_text_repel() +
theme_bw() +
ggtheme()
ggsave(filename = "../figures/DE_genes/corr_DE_t0_t1.svg", width = 14, height = 12)

In [None]:
all_fcs_result  %>% 
dplyr::filter(gene %in% all_markers$gene)  %>% 
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(grepl(gene, pattern = "MTRN")) & 
              dataset %in% c("cd4_l1_full_filt"))  %>% 
dplyr::select(avg_log2FC, gene, dataset, comaprison)  %>% 
pivot_wider(names_from = "comaprison", values_from = avg_log2FC)  %>% 
mutate(gene_in_genes = ifelse(DiaT0_vs_Ctrl > 0.10 & DiaT1_vs_Ctrl > 0.10 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl < -0.2 |
                              DiaT0_vs_Ctrl < -0.1 & DiaT1_vs_Ctrl > 0.1 |
                              DiaT0_vs_Ctrl > 0.1 & DiaT1_vs_Ctrl < -0.1 |
                              DiaT1_vs_Ctrl < -0.5 |
                              DiaT1_vs_Ctrl > 0.10, 
                              gene, NA_character_))  %>% 
ggplot(aes(x = DiaT0_vs_Ctrl, y = DiaT1_vs_Ctrl, label = gene_in_genes, color = dataset))  +
scale_color_manual(values = c("#d45500ff"))  +
geom_point(aes(shape = dataset)) + 
scale_shape_manual(values = c(21,22)) +
ggrepel::geom_text_repel() +
theme_bw() +
ggtheme()
ggsave(filename = "../figures/DE_genes/corr_DE_t0_t1_cd4.svg", width = 14, height = 12)

In [None]:
all_fcs_result  %>% 
dplyr::filter(gene %in% all_markers$gene)  %>% 
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(grepl(gene, pattern = "MTRN")) & 
              dataset %in% c("cd8_l1_full_filt"))  %>% 
dplyr::select(avg_log2FC, gene, dataset, comaprison)  %>% 
pivot_wider(names_from = "comaprison", values_from = avg_log2FC)  %>% 
mutate(gene_in_genes = ifelse(DiaT0_vs_Ctrl > 0.20 & DiaT1_vs_Ctrl > 0.20 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl < -0.2 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl > 0.2 |
                              DiaT0_vs_Ctrl > 0.1 & DiaT1_vs_Ctrl < -0.1 |
                              DiaT1_vs_Ctrl < -0.25 |
                             DiaT0_vs_Ctrl < -0.3 |
                              DiaT1_vs_Ctrl < -0.1, 
                              gene, NA_character_))  %>% 
ggplot(aes(x = DiaT0_vs_Ctrl, y = DiaT1_vs_Ctrl, label = gene_in_genes, color = dataset))  +
scale_color_manual(values = c("#67934aff"))  +
geom_point(aes(shape = dataset)) + 
scale_shape_manual(values = c(21,22)) +
ggrepel::geom_text_repel() +
theme_bw() +
ggtheme()
ggsave(filename = "../figures/DE_genes/corr_DE_t0_t1_cd8.svg", width = 14, height = 12)

In [None]:
colors_of_populations = c(
    'cd4_l1_full' = "#ffa66aff",
    'cd4_l2_subcluster'  = "#ffa66aff",
    'cd4_l2_unc'  = "#cd3333ff",
    'cd4_l3_isaghi'  = "#a46464ff",
    'cd4_l3_naive'  = "#a58413ff",
    'cd4_l3_nfkb'  = "#a46c43ff",
    'cd4_l3_proliferating'  = "#9b3097ff", 
    'cd4_l3_temra'  = "#7d252aff",
    'cd4_l3_tfh'  = "#8e6a1dff",
    'cd4_l3_th1th17'  = "#a94e4eff",
    'cd4_l3_th2'  = "#b36672ff",
    'cd4_l3_treg'  = "#ba6d8dff",
    'cd8_l1_full'  = "#67934aff",
    'cd8_l2_nk'  = "#105292ff",
    'cd8_l2_subcluster' = "#67934aff", 
    'cd8_l2_unc'  = "#6d00c0ff",
    'cd8_l3_naive'  = "#004455ff",
    'cd8_l3_prolif'  = "#5f711aff",
    'cd8_l3_tcm'  = "#427c39ff",
    'cd8_l3_tem'  = "#004455ff",
    'cd8_l3_temra'  = "#16481fff",
    "grey" = "grey88"
)

In [None]:
all_fcs_result  %>% 
dplyr::filter(gene %in% all_markers$gene)  %>% 
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(grepl(gene, pattern = "MTRN")) & 
              grepl(dataset, pattern = "cd4_l3"))  %>% 
dplyr::select(avg_log2FC, gene, dataset, comaprison)  %>% 
pivot_wider(names_from = "comaprison", values_from = avg_log2FC)  %>% 
mutate(gene_in_genes = ifelse(DiaT0_vs_Ctrl > 0.15 & DiaT1_vs_Ctrl > 0.15 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl < -0.2, 
                              gene, NA_character_))  %>% 
ggplot(aes(x = DiaT0_vs_Ctrl, y = DiaT1_vs_Ctrl, label = gene_in_genes, color = dataset))  +
scale_color_manual(values = colors_of_populations)  +
geom_point(aes(shape = dataset)) + 
scale_shape_manual(values = c(21:25,21:25)) +
ggrepel::geom_text_repel() +
theme_bw() +
ggtheme()
ggsave(filename = "../figures/DE_genes/corr_DE_cd4_pop_t0_t1.svg", width = 14, height = 12)

In [None]:
all_fcs_result  %>% 
dplyr::filter(gene %in% all_markers$gene)  %>% 
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(grepl(gene, pattern = "MTRN")) & 
              grepl(dataset, pattern = "cd8_l3"))  %>% 
dplyr::select(avg_log2FC, gene, dataset, comaprison)  %>% 
pivot_wider(names_from = "comaprison", values_from = avg_log2FC)  %>% 
mutate(gene_in_genes = ifelse(DiaT0_vs_Ctrl > 0.15 & DiaT1_vs_Ctrl > 0.15 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl < -0.2, 
                              gene, NA_character_))  %>% 
ggplot(aes(x = DiaT0_vs_Ctrl, y = DiaT1_vs_Ctrl, label = gene_in_genes, color = dataset))  +
scale_color_manual(values = colors_of_populations)  +
geom_point(aes(shape = dataset)) + 
scale_shape_manual(values = c(21:25,21:25)) +
ggrepel::geom_text_repel() +
theme_bw() +
ggtheme()
ggsave(filename = "../figures/DE_genes/corr_DE_cd8_pop_t0_t1.svg", width = 14, height = 12)

In [None]:
options(repr.plot.height = 10, repr.plot.width = 13)

all_fcs_result  %>% 
dplyr::filter(gene %in% all_markers$gene)  %>% 
dplyr::filter(p_val_adj < 0.05)  %>% 
dplyr::filter(!(grepl(gene, pattern = "MTRN")) & 
              dataset %in% c("cd4_l1_full_filt", "cd8_l1_full_filt"))  %>% 
dplyr::select(avg_log2FC, gene, dataset, comaprison)  %>% 
pivot_wider(names_from = "comaprison", values_from = avg_log2FC)  %>% 
mutate(gene_in_genes = ifelse(DiaT0_vs_Ctrl > 0.15 & DiaT1_vs_Ctrl > 0.15 |
                              DiaT0_vs_Ctrl > 0.15 & DiaT1_vs_Ctrl < -0.2 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl > 0.15 |
                              DiaT0_vs_Ctrl < -0.2 & DiaT1_vs_Ctrl < -0.2, 
                              gene, NA_character_))  %>% 
ggplot(aes(x = DiaT0_vs_Ctrl, y = DiaT1_vs_Ctrl, label = gene_in_genes, color = dataset))  +
scale_color_manual(values = c("#ffa66aff","#67934aff"))  +
geom_point(aes(shape = dataset)) + 
scale_shape_manual(values = c(21,22)) +
ggrepel::geom_text_repel() +
theme_bw() +
ggtheme()

# Heatmap all comparisons

In [None]:
hm  <- c("LEF1","BACH2","NELL2","TCF7","CXCR4","ZFP36L2","IL7R","KLF2","CCR7","SELL",
            
             
            rev(c("TNFRSF9","PRF1","NKG7","GZMB","CST7","GNLY","GZMA","CX3CR1","CCL5","TNF","TBX21","IFNG")),
             "TNFAIP3","DUSP1","TSC22D3","NFKBIA","DDIT4","INPP4B",
             rev(c("OASL","IFI44L","ISG15","MX1","STAT1","IFI6","IFIT3")),
                  "BTN3A2","BTN3A3")

In [None]:
get_fc_genes  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "Ctrl")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia"))  %>% 
dplyr::filter(gene %in% hm) 
        
        } else {
        
   if(grepl(paths_fc[j], pattern = "PR_0_T0")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "PR_0 T0"))  %>% 
dplyr::filter(gene %in% hm) 
        } else {
       
        if(grepl(paths_fc[j], pattern = "keto")){
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Keto_1 T0"))   %>% 
dplyr::filter(gene %in% hm) 
    } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia T0"))   %>% 
dplyr::filter(gene %in% hm) 
    }}}

    fc.df$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fc.df)
    }

In [None]:
paths_fc  <- list.files("../../240617_VN_Diabetes_V06/tables/fold_change/", full.names = T)
dataset_names  <- list.files("../../240617_VN_Diabetes_V06/tables/fold_change", full.names = F)

paths_fc  <- paths_fc[grepl(paths_fc, pattern = "l3")|grepl(paths_fc, pattern = "cd8_l2_unc")|
                      grepl(paths_fc, pattern = "cd4_l2_unc")|grepl(paths_fc, pattern = "cd8_l2_nk") ]

dataset_names  <- dataset_names[grepl(dataset_names, pattern = "l3")|grepl(dataset_names, pattern = "cd8_l2_unc")|
                      grepl(dataset_names, pattern = "cd4_l2_unc")|grepl(dataset_names, pattern = "cd8_l2_nk") ]

paths_fc  <- paths_fc[grepl(paths_fc, pattern = "Ctrl_vs_DiaT0")|grepl(paths_fc, pattern = "DiaT0_vs_DiaT1")|
                      grepl(paths_fc, pattern = "Ctrl_vs_DiaT1")|
                      grepl(paths_fc, pattern = "PR_0_T0_vs_PR_1_T0")|grepl(paths_fc, pattern = "ketoacidosis")]

dataset_names  <- dataset_names[grepl(dataset_names, pattern = "Ctrl_vs_DiaT0")|grepl(dataset_names, pattern = "DiaT0_vs_DiaT1")|
                      grepl(dataset_names, pattern = "Ctrl_vs_DiaT1")|
                      grepl(dataset_names, pattern = "PR_0_T0_vs_PR_1_T0")|grepl(dataset_names, pattern = "ketoacidosis")]


In [None]:
fcs  <- purrr::map(1:length(paths_fc), get_fc_genes)

In [None]:
fcs  <- bind_rows(fcs)

In [None]:
fcs$population  <- substr(fcs$dataset, 1,11)

table(fcs$population)

fcs  <- fcs  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

fcs$population2  %>% table

fcs  <- fcs  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


fcs$test_type  %>% table

In [None]:
fcs2  <- fcs  %>% mutate(andjus_logFC = ifelse(p_val_adj<0.05,avg_log2FC,avg_log2FC))  %>% 
dplyr::select(andjus_logFC, gene, dataset)

In [None]:
options(repr.plot.height = 8, repr.plot.width = 24)
fcs  %>% 
ggplot(aes(x = factor(population2, levels = c(
'cd8_l3_prol',
'cd8_l3_tcm_',
'cd4_l3_th1t',
'cd4_l3_temr',
'cd8_l3_temr',
'cd4_l3_tfh_',
'cd8_l3_naiv',
'cd4_l3_naiv',
'cd4_l3_th2_',
'cd4_l3_nfkb',
'cd4_l2_unc_',
'cd4_l3_treg',
'cd4_l3_isag',
'cd4_l3_prol',
'cd8_l3_tem_',
'cd8_l2_nk',
'cd8_l2_unc_'    
))          
           , y = factor(gene, levels = rev(hm)))) +
geom_point(aes(size = -log(p_val_adj), color = avg_log2FC)) +
scale_alpha_manual(values = c(0.5,1)) +
scale_size_continuous(range = c(3,8))+
ylab("") + xlab("") +
facet_grid(cols = vars(test_type), space = "free", scales = "free") +
scale_color_gradientn(colours = c("blue", "blue","white", "red","red")) + 
theme_classic() + theme(axis.text.x = element_text(angle = 90)) + ggtheme()

In [None]:
ggsave("../figures/DE_genes//heatmap_deg.svg", width = 45, height = 25, units = "cm", create.dir = TRUE)

# Fold changes between comparisons

## FoldChanges by Seurat

In [None]:
rm(list = ls())

In [None]:
cd8_l3_tem  <- readRDS("../data/processed/L3/cd8_l3_tem.rds")
cd8_l3_naive  <- readRDS("../data/processed/L3/cd8_l3_naive.rds")
cd8_l3_tcm  <- readRDS("../data/processed/L3/cd8_l3_tcm.rds")
cd8_l3_temra  <- readRDS("../data/processed/L3/cd8_l3_temra.rds")
cd8_l3_prolif  <- readRDS("../data/processed/L3/cd8_l3_prolif.rds")
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")
cd8_l2_nk  <- readRDS("../data/processed/L2/cd8_l2_nk.rds")
cd8_l2_unc  <- readRDS("../data/processed/L2/cd8_l2_unc.rds")
cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")
cd4_l3_naive  <- readRDS("../data/processed/L3/cd4_l3_naive.rds")
cd4_l3_tfh  <- readRDS("../data/processed/L3/cd4_l3_tfh.rds")
cd4_l3_th1th17  <- readRDS("../data/processed/L3/cd4_l3_th1_17.rds")
cd4_l3_nfkb  <- readRDS("../data/processed/L3/cd4_l3_nfkb.rds")
cd4_l3_th2  <- readRDS("../data/processed/L3/cd4_l3_th2.rds")
cd4_l3_treg  <- readRDS("../data/processed/L3/cd4_l3_treg.rds")
cd4_l3_isaghi  <- readRDS("../data/processed/L3/cd4_l3_isaghi.rds")
cd4_l3_proliferating  <- readRDS("../data/processed/L3/cd4_l3_proliferating.rds")
cd4_l3_temra  <- readRDS("../data/processed/L3/cd4_l3_temra.rds")
cd4_l2_unc  <- readRDS("../data/processed/L2/cd4_l2_unc.rds")

In [None]:
cd4_l2_subcluster  <- readRDS("../data/processed/L2/cd4_subcluster.rds")

In [None]:
cd4_l1_full_filt  <- readRDS("../data/processed/L1/cd4_l1_full_filt.rds")
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")

In [None]:
dataset_list  <- list(cd8_l3_tem, cd8_l3_naive, cd8_l3_tcm, cd8_l3_temra, cd8_l3_prolif, 
                    cd8_l2_nk, cd8_l2_unc, cd8_l1_full_filt,
                     cd4_l3_naive, cd4_l3_tfh, cd4_l3_th1th17, cd4_l3_nfkb, cd4_l3_th2,
                     cd4_l3_treg, cd4_l3_isaghi, cd4_l3_proliferating, cd4_l3_temra,
                     cd4_l2_unc, cd4_l2_subcluster, cd4_l1_full_filt, cd8_l2_subcluster)

names(dataset_list) <- c("cd8_l3_tem", "cd8_l3_naive", "cd8_l3_tcm", "cd8_l3_temra", "cd8_l3_prolif",
                       "cd8_l2_nk", "cd8_l2_unc", "cd8_l1_full_filt",
                        "cd4_l3_naive", "cd4_l3_tfh", "cd4_l3_th1th17", "cd4_l3_nfkb", "cd4_l3_th2",
                     "cd4_l3_treg", "cd4_l3_isaghi", "cd4_l3_proliferating", "cd4_l3_temra",
                     "cd4_l2_unc", "cd4_l2_subcluster", "cd4_l1_full_filt", "cd8_l2_subcluster")

In [None]:
options(future.globals.maxSize = 2000 * 1024^2)

In [None]:
dir.create("../tables/fold_change/")

In [None]:
#plan("multisession", workers = 4)

for(i in 1:length(dataset_list)){

seurat_object  <- dataset_list[[i]]
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Disease

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], ".csv"))
}

In [None]:
cd8_l2_subcluster$Condition2  %>% table

In [None]:
#plan("multisession", workers = 8)

for(i in 1:length(dataset_list)){
###### Ctrl T0 vs Dia T0
seurat_object  <- subset(dataset_list[[i]], Condition %in% c("Ctrl T0", "Dia T0"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_Ctrl_vs_DiaT0.csv"))
    
####### Ctrl T0 vs Dia T1
seurat_object  <- subset(dataset_list[[i]], Condition %in% c("Ctrl T0", "Dia T1"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_Ctrl_vs_DiaT1.csv"))
    
    ####### Dia T0 vs Dia T1
seurat_object  <- subset(dataset_list[[i]], Condition %in% c("Dia T0", "Dia T1"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_DiaT0_vs_DiaT1.csv"))
    
    ####### Part remission
seurat_object  <- subset(dataset_list[[i]], Condition2 %in% c("Part_remission_0", "Part_remission_1"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition2

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_remission.csv"))
}

In [None]:
md_for_remission  <- cd4_l1_full_filt@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, part_remission_y_n)  %>% 
tally %>% mutate(Condition2 = paste0("PR_",part_remission_y_n))  %>% 
dplyr::select(-n, -part_remission_y_n)

for( i in 1:length(dataset_list)) {
    
    dataset_list[[i]]$Condition2  <- NULL
    dataset_list[[i]]@meta.data  <- dataset_list[[i]]@meta.data  %>% 
    left_join(md_for_remission)  %>% mutate(Condition2 = paste(Condition2, Time))  %>% 
    mutate(Condition2 = ifelse(grepl(Condition2, pattern = "NA"), NA_character_,Condition2))
    
    print(dataset_list[[i]]$Condition2  %>% table)
    rownames(dataset_list[[i]]@meta.data)  <- colnames(dataset_list[[i]])
}

In [None]:
md_for_ketoacidosis  <- cd4_l1_full_filt@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, ph_man)  %>% 
tally %>% mutate(Ketoacidosis = ifelse(ph_man<7.3,"Keto_1","Keto_0"))  %>% 
dplyr::select(-ph_man, -n)

for( i in 1:length(dataset_list)) {
    
    #dataset_list[[i]]$Ketoacidosis  <- NULL
    dataset_list[[i]]@meta.data  <- dataset_list[[i]]@meta.data  %>% 
    left_join(md_for_ketoacidosis)   %>% mutate(Keto_Time = paste(Ketoacidosis, Time))  %>% 
    mutate(Keto_Time = ifelse(grepl(Keto_Time, pattern = "NA"), NA_character_,Keto_Time))
    
    print(dataset_list[[i]]$Ketoacidosis  %>% table)
    rownames(dataset_list[[i]]@meta.data)  <- colnames(dataset_list[[i]])
}

In [None]:
plan("multisession", workers = 4)

for(i in 1:length(dataset_list)){
###### Keto_Time
seurat_object  <- subset(dataset_list[[i]], Keto_Time %in% c("Keto_1 T0", "Keto_0 T0"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Keto_Time

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_ketoacidosis.csv"))
    
####### PR0 T0 vs PR0 T1
seurat_object  <- subset(dataset_list[[i]], Condition2 %in% c("PR_0 T0", "PR_0 T1"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition2

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_PR_0_T0_vs_PR_0_T1.csv"))
    
####### PR1 T0 vs PR1 T1
seurat_object  <- subset(dataset_list[[i]], Condition2 %in% c("PR_1 T0", "PR_1 T1"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition2

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_PR_1_T0_vs_PR_1_T1.csv"))
    
####### PR0 T0 vs PR1 T0
seurat_object  <- subset(dataset_list[[i]], Condition2 %in% c("PR_0 T0", "PR_1 T0"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition2

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_PR_0_T0_vs_PR_1_T0.csv"))
    
    ####### PR0 T1 vs PR1 T1
seurat_object  <- subset(dataset_list[[i]], Condition2 %in% c("PR_0 T1", "PR_1 T1"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition2

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "wilcox", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change/", names(dataset_list)[i], "_PR_0_T1_vs_PR_1_T1.csv"))
    }

## FoldChanges DESeq

In [None]:
dir.create("../tables/fold_change_deseq/")

In [None]:
dataset_list

In [None]:
seu  <- dataset_list[[1]]

In [None]:
seu$Sample_ID  <- paste0("Id",seu$Sample_ID)

In [None]:
seu$Sample_ID  %>% table

In [None]:
avgexp = AggregateExpression(seu,
                           return.seurat = T, group.by = "Sample_ID", 
                          assay = "RNA", slot = "counts")

In [None]:
colnames(avgexp)

In [None]:
patient_metadata2  <- dataset_list[[i]]@meta.data  %>% 
    dplyr::select(Sample_ID, Patient_ID, Condition, Disease, Time, Experiment_ID)  %>% 
    unique  %>% ungroup

In [None]:
patient_metadata2$Sample_ID  <- paste0("Id",patient_metadata2$Sample_ID)

In [None]:
patient_metadata2$Sample_ID  %>% table

In [None]:
colnames(avgexp)

In [None]:
avgexp$Sample_ID

In [None]:
    avgexp@meta.data  <- avgexp@meta.data   %>% left_join(patient_metadata2)
    rownames(avgexp@meta.data)  <- avgexp$Sample_ID
    

In [None]:
avgexp@meta.data

In [None]:
rownames(avgexp@meta.data)

In [None]:
colnames(avgexp)

In [None]:
seurat_object  <- subset(avgexp, Condition %in% c("Ctrl T0", "Dia T0") & 
                  Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))
DefaultAssay(seurat_object)  <- "RNA"

In [None]:
#plan("multisession", workers = 8)

for(i in 1:length(dataset_list)){
    
    ## DESeq2
    seu  <- dataset_list[[i]]
    patient_metadata2  <- dataset_list[[i]]@meta.data  %>% 
    dplyr::select(Sample_ID, Patient_ID, Condition, Disease, Time, Experiment_ID)  %>% 
    unique  %>% ungroup
    patient_metadata2$Sample_ID  <- paste0("Id",patient_metadata2$Sample_ID)
    seu$Sample_ID  <- paste0("Id",seu$Sample_ID)
    avgexp = AggregateExpression(seu,
                           return.seurat = T, group.by = "Sample_ID", 
                          assay = "RNA", slot = "counts")
    
    avgexp@meta.data  <- avgexp@meta.data   %>% left_join(patient_metadata2)
    rownames(avgexp@meta.data)  <- avgexp$Sample_ID
    
    
###### Ctrl T0 vs Dia T0
seurat_object  <- subset(avgexp, Condition %in% c("Ctrl T0", "Dia T0") & 
                  Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))
DefaultAssay(seurat_object)  <- "RNA"

    
    Idents(seurat_object)  <- seurat_object$Condition

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "DESeq2", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change_deseq/", names(dataset_list)[i], "_Ctrl_vs_DiaT0.csv"))
    
####### Ctrl T0 vs Dia T1
seurat_object  <- subset(avgexp, Condition %in% c("Ctrl T0", "Dia T1") & 
                  Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "DESeq2", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change_deseq/", names(dataset_list)[i], "_Ctrl_vs_DiaT1.csv"))
    
    ####### Dia T0 vs Dia T1
seurat_object  <- subset(avgexp, Condition %in% c("Dia T0", "Dia T1") & 
                  Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "DESeq2", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change_deseq/", names(dataset_list)[i], "_DiaT0_vs_DiaT1.csv"))
 
}

In [None]:
md_for_remission  <- cd4_l1_full_filt@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, part_remission_y_n)  %>% 
tally %>% mutate(Condition2 = paste0("PR_",part_remission_y_n))  %>% 
dplyr::select(-n, -part_remission_y_n)

for( i in 1:length(dataset_list)) {
    
    dataset_list[[i]]$Condition2  <- NULL
    dataset_list[[i]]@meta.data  <- dataset_list[[i]]@meta.data  %>% 
    left_join(md_for_remission)  %>% mutate(Condition2 = paste(Condition2, Time))  %>% 
    mutate(Condition2 = ifelse(grepl(Condition2, pattern = "NA"), NA_character_,Condition2))
    
    print(dataset_list[[i]]$Condition2  %>% table)
    rownames(dataset_list[[i]]@meta.data)  <- colnames(dataset_list[[i]])
}

In [None]:
md_for_ketoacidosis  <- cd4_l1_full_filt@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, ph_man)  %>% 
tally %>% mutate(Ketoacidosis = ifelse(ph_man<7.3,"Keto_1","Keto_0"))  %>% 
dplyr::select(-ph_man, -n)

for( i in 1:length(dataset_list)) {
    
    #dataset_list[[i]]$Ketoacidosis  <- NULL
    dataset_list[[i]]@meta.data  <- dataset_list[[i]]@meta.data  %>% 
    left_join(md_for_ketoacidosis)   %>% mutate(Keto_Time = paste(Ketoacidosis, Time))  %>% 
    mutate(Keto_Time = ifelse(grepl(Keto_Time, pattern = "NA"), NA_character_,Keto_Time))
    
    print(dataset_list[[i]]$Ketoacidosis  %>% table)
    rownames(dataset_list[[i]]@meta.data)  <- colnames(dataset_list[[i]])
}

In [None]:
plan("multisession", workers = 4)

for(i in 1:length(dataset_list)){
    
       ## DESeq2
    seu  <- dataset_list[[i]]
    patient_metadata2  <- seu@meta.data  %>% 
    dplyr::select(Sample_ID, Patient_ID, Condition, Disease, Time, Experiment_ID, Keto_Time, Condition2)  %>% 
    unique  %>% ungroup
    patient_metadata2$Sample_ID  <- paste0("Id",patient_metadata2$Sample_ID)
    seu$Sample_ID  <- paste0("Id",seu$Sample_ID)
    avgexp = AggregateExpression(seu,
                           return.seurat = T, group.by = "Sample_ID", 
                          assay = "RNA", slot = "counts")
    
    avgexp@meta.data  <- avgexp@meta.data   %>% left_join(patient_metadata2)
    rownames(avgexp@meta.data)  <- avgexp$Sample_ID
    
###### Keto_Time
seurat_object  <- subset(avgexp, Keto_Time %in% c("Keto_1 T0", "Keto_0 T0") & 
                  Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Keto_Time

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "DESeq2", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change_deseq/", names(dataset_list)[i], "_ketoacidosis.csv"))
 
####### PR0 T0 vs PR1 T0
seurat_object  <- subset(avgexp, Condition2 %in% c("PR_0 T0", "PR_1 T0") & 
                  Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))
DefaultAssay(seurat_object)  <- "RNA"
Idents(seurat_object)  <- seurat_object$Condition2

fc.seurat_object <- FindAllMarkers(seurat_object, 
                          test.use = "DESeq2", 
                          logfc.threshold = -Inf, min.pct = -Inf, min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

write.csv(fc.seurat_object, paste0("../tables/fold_change_deseq/", names(dataset_list)[i], "_PR_0_T0_vs_PR_1_T0.csv"))
    
    }

In [None]:
list.files("../tables/fold_change_deseq/")

## All categories up and down

In [None]:
pathways = msigdbr_collections()  %>% group_by(gs_cat)  %>% tally()  %>% pull(gs_cat)

In [None]:
t2g <- msigdbr(species = "Homo sapiens", category = "C7") %>% 
          dplyr::select(gs_name, human_gene_symbol)

In [None]:
get_pathway_df  <- function(pathway){
    t2g <- msigdbr(species = "Homo sapiens", category = pathway) %>% 
          dplyr::select(gs_name, human_gene_symbol)
    
    
    
    em2 <- GSEA(global_down_values, TERM2GENE = t2g)
    df_down  <- as.data.frame(em2)
                
    if(nrow(df_up)>0&nrow(df_down)>0){
        df_down$direction  <- "Down"
        df_up$direction  <- "Up"
        df_all  <- rbind(df_down, df_up)
        df_all$gs_cat  <- pathway
        
    } else {
        if(nrow(df_up)>0){
            df_up$direction  <- "Up"
            df_all  <- df_up
            df_all$gs_cat  <- pathway
    
    } else {
        if(nrow(df_down)>0){
            df_down$direction  <- "Down"
            df_all  <- df_down 
            df_all$gs_cat  <- pathway
    
    } else {
            df_all  <- NULL 
        } }}
    return(df_all)
}

In [None]:
all_pathways  <- map(.x = pathways, .f = get_pathway_df)

In [None]:
all_pathways2  <- bind_rows(all_pathways)

In [None]:
all_pathways  <- map(.x = pathways, .f = get_pathway_df)

In [None]:
all_pathways2  <- bind_rows(all_pathways)

In [None]:
all_pathways2$core_enrichment   %>% nchar  %>% table

In [None]:
711/4

In [None]:
dir.create("../tables/pathways/")

In [None]:
write.csv(all_pathways2, "../tables/pathways/gsea_global.csv")

In [None]:
enriched_genes  <- all_pathways2  %>% 
dplyr::select(core_enrichment)  %>% 
separate(core_enrichment, sep = "/",
        into = paste0("v",1:138))



In [None]:
enriched_genes

In [None]:
geneIDs2  <- geneIDs1  %>% group_by(entrezid)  %>% slice_head(n = 1)

In [None]:
geneIDs2  <- geneIDs2  %>% filter(!is.na(entrezid))

In [None]:
df_enriched_genes  <- enriched_genes

In [None]:
for(i in 1:138){
    df1  <- dplyr::select(df_enriched_genes, i)
    colnames(df1)  <- "entrezid"
    df1$entrezid  <- as.integer(df1$entrezid)
    df2  <- left_join(df1, geneIDs2)
    df_enriched_genes[[paste0("v",i)]]  <- df2$gene
}

In [None]:
df_enriched_genes

In [None]:
df_enriched_genes$genes  <- ""

In [None]:
for(i in 1:411){
   df_enriched_genes[i, 139]  <-  gsub(pattern =  " / NA",
        replacement = "",
        x = paste(df_enriched_genes[i,1:138], collapse=" / ")) 
}

In [None]:
df_enriched_genes

In [None]:
all_pathways2$genes  <- df_enriched_genes$genes

In [None]:
write.csv(all_pathways2, "../tables/pathways/gsea_global.csv")

Cluster profiler pathway heatmap

In [None]:
t2g <- msigdbr(species = "Homo sapiens", category = "C7") 

In [None]:
fgsea_sets<- t2g %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
fc.df  <- read_csv("../tables/fold_change/cd4_l1_full_filt.csv")  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Dia")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)

In [None]:
edo <- fgsea(pathways=t2g, stats=ranks)

In [None]:
edo

In [None]:
edox <- setReadable(edo, 'org.Hs.eg.db', 'ENTREZID')

In [None]:
p2 <- heatplot(edox, foldChange=geneList, showCategory=5)

# GSEA all pathways

In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "C7")


In [None]:
fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
paths_fc  <- list.files("../tables/fold_change", full.names = T)
dataset_names  <- list.files("../tables/fold_change", full.names = F)


In [None]:
paths_fc

In [None]:
paths_fc  <- paths_fc[which(grepl(paths_fc, pattern = "Ctrl")|grepl(paths_fc, pattern = "DiaT0_vs_DiaT1"))]

In [None]:
dataset_names  <- dataset_names[which(grepl(dataset_names, pattern = "Ctrl")|grepl(dataset_names, pattern = "DiaT0_vs_DiaT1"))]

In [None]:
dataset_names

In [None]:
paths_fc

In [None]:
fc.df  <- read_csv(paths_fc[1])  %>% as.data.frame()

In [None]:
fc.df %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(x = cluster, pattern = "Dia")) 

## C7 pathways

In [None]:
gsea_all_pathways  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "Ctrl")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)
        } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia T0"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)       
    }
fgseaRes <- fgsea(pathways = fgsea_sets, 
                 stats = ranks)
 fgseaRes$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fgseaRes)
    }

In [None]:
gsea  <- map(.x = c(1:length(paths_fc)), .f = gsea_all_pathways)

In [None]:
gseas  <- bind_rows(gsea)

In [None]:
gseas  %>% arrange(padj)

In [None]:
dir.create("../tables/gsea")

In [None]:
gseas_df  <- gseas  %>% as.data.frame

In [None]:
gseas_df  <- gseas_df  %>% mutate(
    leadingEdge2 = paste(leadingEdge))

In [None]:
gseas_df$leadingEdge  <- NULL

In [None]:
write.csv(gseas_df, "../tables/gsea/gsea_fold_changes_for_heatmap.csv")

In [None]:
gseas_df

## DESEQ

In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "C7")


In [None]:
fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
paths_fc  <- list.files("../tables/fold_change_deseq/", full.names = T)
dataset_names  <- list.files("../tables/fold_change_deseq/", full.names = F)


In [None]:
paths_fc

In [None]:
paths_fc  <- paths_fc[which(grepl(paths_fc, pattern = "Ctrl")|grepl(paths_fc, pattern = "DiaT0_vs_DiaT1"))]

In [None]:
dataset_names  <- dataset_names[which(grepl(dataset_names, pattern = "Ctrl")|grepl(dataset_names, pattern = "DiaT0_vs_DiaT1"))]

In [None]:
dataset_names

In [None]:
paths_fc

In [None]:
fc.df  <- read_csv(paths_fc[1])  %>% as.data.frame()

In [None]:
fc.df %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(x = cluster, pattern = "Dia")) 

In [None]:
fc.df %>%  arrange(p_val_adj) 


## C7 pathways

In [None]:
gsea_all_pathways  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "Ctrl")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)
        } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia T0"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)       
    }
fgseaRes <- fgsea(pathways = fgsea_sets, 
                 stats = ranks)
 fgseaRes$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fgseaRes)
    }

In [None]:
gsea  <- map(.x = c(1:length(paths_fc)), .f = gsea_all_pathways)

In [None]:
gseas  <- bind_rows(gsea)

In [None]:
gseas  %>% arrange(padj)

In [None]:
dir.create("../tables/gsea")

In [None]:
gseas_df  <- gseas  %>% as.data.frame

In [None]:
gseas_df  <- gseas_df  %>% mutate(
    leadingEdge2 = paste(leadingEdge))

In [None]:
gseas_df$leadingEdge  <- NULL

In [None]:
write.csv(gseas_df, "../tables/gsea/gsea_fold_changes_for_heatmap.csv")

## Hallmark pathways

In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "H")


fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
names(fgsea_sets)

In [None]:
gsea_all_pathways  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "Ctrl")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)
        } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Dia T0"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)       
    }
fgseaRes <- fgsea(pathways = fgsea_sets, 
                 stats = ranks)
 fgseaRes$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fgseaRes)
    }

In [None]:
gsea  <- map(.x = c(1:length(paths_fc)), .f = gsea_all_pathways)

In [None]:
gseas  <- bind_rows(gsea)

In [None]:
gseas  %>% arrange(padj)

In [None]:
gseas_df  <- gseas  %>% as.data.frame

In [None]:
gseas_df  <- gseas_df  %>% mutate(
    leadingEdge2 = paste(leadingEdge))

In [None]:
gseas_df$leadingEdge  <- NULL

In [None]:
write.csv(gseas_df, "../tables/gsea/gsea_fold_changes_for_heatmap_hallmark.csv")

In [None]:
gseas_df

## Heatmap immune and hallmark

In [None]:
gseas_hallamrk  <- read_csv("../tables/gsea/gsea_fold_changes_for_heatmap_hallmark.csv")

In [None]:
gseas_immune  <- read_csv("../tables/gsea/gsea_fold_changes_for_heatmap.csv")

In [None]:
gseas_both  <- rbind(gseas_immune, gseas_hallamrk)

In [None]:
## Enriched at least in 15 population-test_type combinations, padj at least 10-5

In [None]:
gseas_selected  <- gseas_both  %>% dplyr::filter(padj < 0.00001)  %>% group_by(pathway)  %>% tally

In [None]:
gseas_selected

In [None]:
gseas_selected  <- gseas_selected  %>% dplyr::filter(n>5)  %>% pull(pathway)  %>% unique

In [None]:
gseas_selected

In [None]:
gseas_selected  %>% length

In [None]:
gseas_selected2  <- gseas_both  %>% dplyr::filter(pathway %in% gseas_selected)  %>% 
dplyr::select(NES, dataset, pathway)  %>% 
mutate(dataset = gsub(dataset, pattern = "_Ctrl_vs_DiaT0", replacement = ".Ctrl_vs_DiaT0"))  %>% 
mutate(dataset = gsub(dataset, pattern = "_Ctrl_vs_DiaT1", replacement = ".Ctrl_vs_DiaT1"))  %>% 
mutate(dataset = gsub(dataset, pattern = "_DiaT0_vs_DiaT1", replacement = ".DiaT0_vs_DiaT1"))  %>% 
separate(dataset, into = c("population", "test_type"), sep = "\\.", remove = F)  %>% 
pivot_wider(names_from = "pathway", values_from = "NES", values_fill = 0) 

In [None]:
gseas_mtx  <- gseas_selected2  %>% arrange(test_type, population)  %>% 
dplyr::select(-dataset, -population, -test_type)  %>% 
as.matrix()

In [None]:
rownames(gseas_mtx)  <- gseas_selected2  %>% arrange(test_type, population)  %>% pull(dataset)

In [None]:
library(pheatmap)

In [None]:
colnames(gseas_mtx)

In [None]:
!which(grepl(rownames(gseas_mtx), pattern = "subcluster")|
                         grepl(rownames(gseas_mtx), pattern = "_l1_"))

In [None]:
options(repr.plot.width = 30, repr.plot.height = 18)
pheatmap(gseas_mtx[which((grepl(rownames(gseas_mtx), pattern = "subcluster")==F) &
                         (grepl(rownames(gseas_mtx), pattern = "_l1_")==F)),], 
         main = "", scale = "none", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
options(repr.plot.width = 30, repr.plot.height = 18)
pheatmap(gseas_mtx, main = "", scale = "none", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 40, height = 30,
                  fontsize = 9, filename = "../figures/heatmaps/gsea_heatmap.pdf")

In [None]:
hm_ctrl_diat0  <- pheatmap(gseas_mtx[which((grepl(rownames(gseas_mtx), pattern = "subcluster")==F) &
                         (grepl(rownames(gseas_mtx), pattern = "_l1_")==F) &
                        grepl(rownames(gseas_mtx), pattern = "Ctrl_vs_DiaT0")),], 
         main = "", scale = "none", cluster_cols = T, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
rownames(gseas_mtx[which((grepl(rownames(gseas_mtx), pattern = "subcluster")==F) &
                         (grepl(rownames(gseas_mtx), pattern = "_l1_")==F) &
                        grepl(rownames(gseas_mtx), pattern = "Ctrl_vs_DiaT0")),])[
    c(14, 15, 8, 6, 17, 7, 13, 3, 9, 4, 1, 10, 2, 5, 16, 11, 12)]

In [None]:
rownames(gseas_mtx[which((grepl(rownames(gseas_mtx), pattern = "subcluster")==F) &
                         (grepl(rownames(gseas_mtx), pattern = "_l1_")==F) &
                        grepl(rownames(gseas_mtx), pattern = "Ctrl_vs_DiaT1")),])[
    c(14, 15, 8, 6, 17, 7, 13, 3, 9, 4, 1, 10, 2, 5, 16, 11, 12)]

In [None]:
options(repr.plot.width = 30, repr.plot.height = 18)
pheatmap(gseas_mtx[c(rownames(gseas_mtx[which((grepl(rownames(gseas_mtx), pattern = "subcluster")==F) &
                         (grepl(rownames(gseas_mtx), pattern = "_l1_")==F) &
                        grepl(rownames(gseas_mtx), pattern = "Ctrl_vs_DiaT0")),])[
    c(14, 15, 8, 6, 17, 7, 13, 3, 9, 4, 1, 10, 2, 5, 16, 11, 12)],
                    rownames(gseas_mtx[which((grepl(rownames(gseas_mtx), pattern = "subcluster")==F) &
                         (grepl(rownames(gseas_mtx), pattern = "_l1_")==F) &
                        grepl(rownames(gseas_mtx), pattern = "Ctrl_vs_DiaT1")),])[
    c(14, 15, 8, 6, 17, 7, 13, 3, 9, 4, 1, 10, 2, 5, 16, 11, 12)],
                    rownames(gseas_mtx[which((grepl(rownames(gseas_mtx), pattern = "subcluster")==F) &
                         (grepl(rownames(gseas_mtx), pattern = "_l1_")==F) &
                        grepl(rownames(gseas_mtx), pattern = "DiaT0_vs_DiaT1")),])[
    c(14, 15, 8, 6, 17, 7, 13, 3, 9, 4, 1, 10, 2, 5, 16, 11, 12)]),], 
         main = "", scale = "none", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
options(repr.plot.width = 30, repr.plot.height = 18)

pheatmap(gseas_mtx[c(rownames(gseas_mtx[which((grepl(rownames(gseas_mtx), pattern = "subcluster")==F) &
                         (grepl(rownames(gseas_mtx), pattern = "_l1_")==F) &
                        grepl(rownames(gseas_mtx), pattern = "Ctrl_vs_DiaT0")),])[
    c(14, 15, 8, 6, 17, 7, 13, 3, 9, 4, 1, 10, 2, 5, 16, 11, 12)],
                    rownames(gseas_mtx[which((grepl(rownames(gseas_mtx), pattern = "subcluster")==F) &
                         (grepl(rownames(gseas_mtx), pattern = "_l1_")==F) &
                        grepl(rownames(gseas_mtx), pattern = "Ctrl_vs_DiaT1")),])[
    c(14, 15, 8, 6, 17, 7, 13, 3, 9, 4, 1, 10, 2, 5, 16, 11, 12)],
                    rownames(gseas_mtx[which((grepl(rownames(gseas_mtx), pattern = "subcluster")==F) &
                         (grepl(rownames(gseas_mtx), pattern = "_l1_")==F) &
                        grepl(rownames(gseas_mtx), pattern = "DiaT0_vs_DiaT1")),])[
    c(14, 15, 8, 6, 17, 7, 13, 3, 9, 4, 1, 10, 2, 5, 16, 11, 12)]),], 
         main = "", scale = "none", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 30, height = 15,
                  fontsize = 9, filename = "../figures/heatmaps/gsea_heatmap_ordered_with_hallmark.pdf")

## Heatmap

In [None]:
gseas_selected  <- gseas_both  %>% dplyr::filter(padj < 0.00000001)  %>% pull(pathway)  %>% unique

In [None]:
gseas_selected  %>% length

In [None]:
gseas_selected2  <- gseas  %>% dplyr::filter(pathway %in% gseas_selected)  %>% 
dplyr::select(NES, dataset, pathway)  %>% 
mutate(dataset = gsub(dataset, pattern = "_Ctrl_vs_DiaT0", replacement = ".Ctrl_vs_DiaT0"))  %>% 
mutate(dataset = gsub(dataset, pattern = "_Ctrl_vs_DiaT1", replacement = ".Ctrl_vs_DiaT1"))  %>% 
mutate(dataset = gsub(dataset, pattern = "_DiaT0_vs_DiaT1", replacement = ".DiaT0_vs_DiaT1"))  %>% 
separate(dataset, into = c("population", "test_type"), sep = "\\.", remove = F)  %>% 
pivot_wider(names_from = "pathway", values_from = "NES", values_fill = 0) 

In [None]:
gseas_mtx  <- gseas_selected2  %>% arrange(test_type, population)  %>% 
dplyr::select(-dataset, -population, -test_type)  %>% 
as.matrix()

In [None]:
rownames(gseas_mtx)  <- gseas_selected2  %>% arrange(test_type, population)  %>% pull(dataset)

In [None]:
library(pheatmap)

In [None]:
colnames(gseas_mtx)

In [None]:
options(repr.plot.width = 30, repr.plot.height = 18)
pheatmap(gseas_mtx, main = "", scale = "none", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
options(repr.plot.width = 30, repr.plot.height = 18)
pheatmap(gseas_mtx, main = "", scale = "none", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 40, height = 30,
                  fontsize = 9, filename = "../figures/heatmaps/gsea_heatmap.pdf")

In [None]:
write.csv(colnames(gseas_mtx), "../tables/gsea/gseas_top_in_ehatmap_picture.csv")

In [None]:
annotation_row = annot1

In [None]:
levels = gseas  %>% group_by(dataset)  %>% slice_max(order_by = NES, n = 5) %>% pull(pathway)  %>% unique

In [None]:

options(repr.plot.width = 20, repr.plot.height = 25)
gseas  %>% group_by(dataset)  %>% slice_max(order_by = NES, n = 5) %>% 
ggplot(aes(x = factor(pathway, levels = levels), y = NES)) + 
geom_point(aes(size = size, color = dataset, alpha = -padj)) + coord_flip() +
facet_wrap(~dataset, ncol = 1, drop = TRUE, scales = "free")

In [None]:
gseas

In [None]:
Labels = gseas  %>% 
dplyr::filter(dataset == "cd4_l1_full_filt")  %>% 
pull(pathway)

In [None]:
Labels = ifelse(grepl(pattern = "TREG", Labels), Labels, NA_character_)

In [None]:
Labels

In [None]:
library(ggrepel)

ggrepel::geom_text_repel

In [None]:
options(repr.plot.width = 20, repr.plot.height = 9)

gseas  %>% 
dplyr::filter(dataset == "cd4_l1_full_filt")  %>% 
ggplot(aes(x = NES, y = padj)) + 
geom_point(aes(size = size, color = grepl(pathway, pattern = "TREG"), alpha = -padj)) +
geom_text_repel(aes(label = Labels))

## GSEA heatmap v07

In [None]:
gseas_mtx  <- gseas_selected2  %>% arrange(test_type, population)  %>% 
dplyr::select(-dataset, -population, -test_type)  %>% 
as.matrix()

In [None]:
which(colnames(gseas_selected2) %in% c("GOLDRATH_EFF_VS_MEMORY_CD8_TCELL_UP",
"GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_DN",
                                       "GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN",
                                       "GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP"                                       ))

In [None]:
gseas_selected2

In [None]:
gseas_selected3  <- gseas_selected2  %>% arrange(test_type, population)  %>% 
dplyr::select(1,2,3,21,22,39,40)  %>% 
dplyr::select(-dataset, -population, -test_type)  %>% 
as.matrix()


In [None]:
gseas_selected3

In [None]:
pheatmap(gseas_selected3, colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50))

# GSEA in KetoAcidosis and Remission

In [None]:
library(msigdbr)

In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "C7")


In [None]:
fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
paths_fc  <- list.files("../tables/fold_change", full.names = T)
dataset_names  <- list.files("../tables/fold_change", full.names = F)


In [None]:
paths_fc

In [None]:
paths_fc  <- paths_fc[which(grepl(paths_fc, pattern = "ketoacidosis")|grepl(paths_fc, pattern = "PR_0_T0_vs_PR_1_T0"))]

In [None]:
dataset_names  <- dataset_names[which(grepl(dataset_names, pattern = "ketoacidosis")|grepl(dataset_names, pattern = "PR_0_T0_vs_PR_1_T0"))]

In [None]:
dataset_names

In [None]:
paths_fc

In [None]:
fc.df  <- read_csv(paths_fc[2])  %>% as.data.frame()

In [None]:
fc.df

## C7 pathways

In [None]:
gsea_all_pathways  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "PR_0_T0")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "PR_0 T0"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)
        } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Keto_1 T0"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)       
    }
fgseaRes <- fgsea(pathways = fgsea_sets, 
                 stats = ranks)
 fgseaRes$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fgseaRes)
    }

In [None]:
gsea  <- map(.x = c(1:length(paths_fc)), .f = gsea_all_pathways)

In [None]:
gseas  <- bind_rows(gsea)

In [None]:
gseas  %>% arrange(padj)

In [None]:
dir.create("../tables/gsea")

In [None]:
gseas_df  <- gseas  %>% as.data.frame

In [None]:
gseas_df  <- gseas_df  %>% mutate(
    leadingEdge2 = paste(leadingEdge))

In [None]:
gseas_df$leadingEdge  <- NULL

In [None]:
write.csv(gseas_df, "../tables/gsea/gsea_fold_changes_for_heatmap_ketoPR.csv")

In [None]:
gseas_df

## Hallmark pathways

In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "H")


fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
names(fgsea_sets)

In [None]:
gsea_all_pathways  <- function(j) {
  print(j)
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
    if(grepl(paths_fc[j], pattern = "PR_0_T0")){
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "PR_0 T0"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)
        } else {
 fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(cluster, pattern = "Keto_1 T0"))  %>% 
  dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)       
    }
fgseaRes <- fgsea(pathways = fgsea_sets, 
                 stats = ranks)
 fgseaRes$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
 return(fgseaRes)
    }

In [None]:
gsea  <- map(.x = c(1:length(paths_fc)), .f = gsea_all_pathways)

In [None]:
gseas  <- bind_rows(gsea)

In [None]:
gseas  %>% arrange(padj)

In [None]:
gseas_df  <- gseas  %>% as.data.frame

In [None]:
gseas_df  <- gseas_df  %>% mutate(
    leadingEdge2 = paste(leadingEdge))

In [None]:
gseas_df$leadingEdge  <- NULL

In [None]:
write.csv(gseas_df, "../tables/gsea/gsea_fold_changes_for_heatmap_hallmark_ketoPR.csv")

In [None]:
gseas_df

## Heatmap of selected GSEAS V07

In [None]:
paths_fc  <- list.files("../tables/gsea/", full.names = T)


In [None]:
paths_fc  <- paths_fc[grepl(paths_fc, pattern = "gsea_fold_changes_for_heatmap")]

In [None]:
paths_fc

In [None]:
test  <- fread(paths_fc[3])

In [None]:
test

In [None]:
gseas  <- map(paths_fc, .f = fread)

In [None]:
gseas  <- bind_rows(gseas)

In [None]:
gseas$population  <- substr(gseas$dataset, 1,11)

In [None]:
table(gseas$population)

In [None]:
gseas  <- gseas  %>% mutate(population2 = ifelse(grepl(population, pattern = "_nk_"), substr(population,1,9), population))

In [None]:
gseas$population2  %>% table

In [None]:
gseas  <- gseas  %>% mutate(n1 = as.numeric(nchar(dataset)))  %>% 
mutate(n2 = n1-12)  %>% 
mutate(test_type = substr(dataset,n2,n1))  


In [None]:
gseas$test_type  %>% table

In [None]:
gseas

In [None]:
gs  <- gseas  %>% group_by(dataset)  %>% slice_max(order_by = -log10(padj), n = 10)

In [None]:
write.csv(gs, "../tables/gsea/v07heatmap_top_pathways.csv")

In [None]:
plotEnrichment(fgsea_sets[["GOLDRATH_EFF_VS_MEMORY_CD8_TCELL_UP"]],
               ranks) + labs(title="GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_DN") +

plotEnrichment(fgsea_sets[["GOLDRATH_NAIVE_VS_MEMORY_CD8_TCELL_DN"]],
               ranks) + labs(title="GOLDRATH_NAIVE_VS_MEMORY_CD8_TCELL_DN") +

plotEnrichment(fgsea_sets[["GSE11057_NAIVE_VS_CENT_MEMORY_CD4_TCELL_DN"]],
               ranks) + labs(title="GSE11057_NAIVE_VS_CENT_MEMORY_CD4_TCELL_DN") 




plotEnrichment(fgsea_sets[["GOLDRATH_EFF_VS_MEMORY_CD8_TCELL_DN"]],
               ranks) + labs(title="GOLDRATH_EFF_VS_MEMORY_CD8_TCELL_DN") + 

plotEnrichment(fgsea_sets[["GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_UP"]],
               ranks) + labs(title="GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_UP") +

plotEnrichment(fgsea_sets[["GOLDRATH_NAIVE_VS_MEMORY_CD8_TCELL_UP"]],
               ranks) + labs(title="GOLDRATH_NAIVE_VS_MEMORY_CD8_TCELL_UP") +

plotEnrichment(fgsea_sets[["GSE11057_NAIVE_VS_CENT_MEMORY_CD4_TCELL_UP"]],
               ranks) + labs(title="GSE11057_NAIVE_VS_CENT_MEMORY_CD4_TCELL_UP") 



plotEnrichment(fgsea_sets[["GSE11057_NAIVE_VS_CENT_MEMORY_CD4_TCELL_UP"]],
               ranks) + labs(title="GSE11057_NAIVE_VS_CENT_MEMORY_CD4_TCELL_UP") +

plotEnrichment(fgsea_sets[["GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN"]],
               ranks) + labs(title="GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN") +

plotEnrichment(fgsea_sets[["GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP"]],
               ranks) + labs(title="GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP") +

plotEnrichment(fgsea_sets[["GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_DN"]],
               ranks) + labs(title="GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_DN") 


options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets[["GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_DN"]],
               ranks) + labs(title="GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_DN") 

plotEnrichment(fgsea_sets[["GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_UP"]],
               ranks) + labs(title="GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_UP") 

plotEnrichment(fgsea_sets[["GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_UP"]],
               ranks) + labs(title="GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_UP") +

plotEnrichment(fgsea_sets[["GSE13738_RESTING_VS_BYSTANDER_ACTIVATED_CD4_TCELL_DN"]],
               ranks) + labs(title="GSE13738_RESTING_VS_BYSTANDER_ACTIVATED_CD4_TCELL_DN") +

plotEnrichment(fgsea_sets[["GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_DN"]],
               ranks) + labs(title="GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_DN") +

plotEnrichment(fgsea_sets[["GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP"]],
               ranks) + labs(title="GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP") 


In [None]:
selected_pathways  <- c('HALLMARK_TNFA_SIGNALING_VIA_NFKB',
'HALLMARK_OXIDATIVE_PHOSPHORYLATION',

'HALLMARK_INTERFERON_ALPHA_RESPONSE',
'HALLMARK_INTERFERON_GAMMA_RESPONSE',
                        "GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP",
                        "GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP",
                        
"GSE11057_NAIVE_VS_MEMORY_CD4_TCELL_UP",
"GOLDRATH_NAIVE_VS_MEMORY_CD8_TCELL_DN",
'GOLDRATH_EFF_VS_MEMORY_CD8_TCELL_UP',
'GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_DN'
)

In [None]:
names(gseas$population2  %>% table) %in% c(
'cd8_l3_prol',
'cd8_l3_tcm_',
'cd4_l3_th1t7',
'cd4_l3_temr',
'cd8_l3_temr',
'cd4_l3_tfh_',
'cd8_l3_naiv',
'cd4_l3_naiv',
'cd4_l3_th2_',
'cd4_l3_nfkb',
'cd4_l2_unc_',
'cd4_l3_treg',
'cd4_l3_isag',
'cd4_l3_prol',
'cd8_l3_tem_',
'cd8_l2_nk',
'cd8_l2_unc_'    
)

In [None]:
names(gseas$population2  %>% table)[!(names(gseas$population2  %>% table) %in% c(
'cd8_l3_prol',
'cd8_l3_tcm_',
'cd4_l3_th1t7',
'cd4_l3_temr',
'cd8_l3_temr',
'cd4_l3_tfh_',
'cd8_l3_naiv',
'cd4_l3_naiv',
'cd4_l3_th2_',
'cd4_l3_nfkb',
'cd4_l2_unc_',
'cd4_l3_treg',
'cd4_l3_isag',
'cd4_l3_prol',
'cd8_l3_tem_',
'cd8_l2_nk',
'cd8_l2_unc_'    
))]

In [None]:
options(repr.plot.height = 8, repr.plot.width = 24)
gseas  %>% dplyr::filter(pathway %in% selected_pathways &
                        ((population2 %in% c("cd8_l1_full","cd4_l1_full","cd4_l2_subc","cd8_l2_subc")) == F))  %>% 
ggplot(aes(x = factor(population2, levels = c(
'cd8_l3_prol',
'cd8_l3_tcm_',
'cd4_l3_th1t',
'cd4_l3_temr',
'cd8_l3_temr',
'cd4_l3_tfh_',
'cd8_l3_naiv',
'cd4_l3_naiv',
'cd4_l3_th2_',
'cd4_l3_nfkb',
'cd4_l2_unc_',
'cd4_l3_treg',
'cd4_l3_isag',
'cd4_l3_prol',
'cd8_l3_tem_',
'cd8_l2_nk',
'cd8_l2_unc_'    
)), y = pathway)) +
geom_point(aes(size = -log(padj), color = NES, alpha = padj<0.05)) +
scale_alpha_manual(values = c(0,1)) +
facet_grid(cols = vars(test_type), space = "free", scales = "free") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
xlab("") + ylab("") +
theme_classic() + theme(axis.text.x = element_text(angle = 90),
                       axis.ticks.x = element_blank()) + ggtheme()
ggsave("../figures/gsea/heatmap_gsea.svg", width = 55, height = 16, units = "cm")

In [None]:
selected_pathways  <- c(
 'HALLMARK_INTERFERON_ALPHA_RESPONSE',
'HALLMARK_INTERFERON_GAMMA_RESPONSE',
'GSE13485_DAY7_VS_DAY21_YF17D_VACCINE_PBMC_UP',
'GOLDRATH_EFF_VS_MEMORY_CD8_TCELL_UP',
'GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_DN')

options(repr.plot.height = 5, repr.plot.width = 24)
gseas  %>% dplyr::filter(pathway %in% selected_pathways)  %>% 
ggplot(aes(x = population, y = factor(pathway, levels = selected_pathways))) +
geom_point(aes(size = -log(padj), color = NES, alpha = padj<0.05)) +
scale_alpha_manual(values = c(0,1)) +
facet_grid(cols = vars(test_type), space = "free", scales = "free") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
theme_classic() + theme(axis.text.x = element_text(angle = 90)) + ggtheme()

In [None]:
selected_pathways  <- c(
    "GSE25087_FETAL_VS_ADULT_TCONV_UP",
                       "GSE36476_CTRL_VS_TSST_ACT_40H_MEMORY_CD4_TCELL_YOUNG_DN",
                       "GSE22886_NAIVE_CD4_TCELL_VS_48H_ACT_TH2_DN",
    "GSE24634_NAIVE_CD4_TCELL_VS_DAY7_IL4_CONV_TREG_UP",
"GSE17974_0H_VS_24H_IN_VITRO_ACT_CD4_TCELL_UP",
    "GSE36476_CTRL_VS_TSST_ACT_72H_MEMORY_CD4_TCELL_YOUNG_UP")

options(repr.plot.height = 5, repr.plot.width = 24)
gseas  %>% dplyr::filter(pathway %in% selected_pathways)  %>% 
ggplot(aes(x = population, y = factor(pathway, levels = selected_pathways))) +
geom_point(aes(size = -log(padj), color = NES, alpha = padj<0.05)) +
scale_alpha_manual(values = c(0,1)) +
facet_grid(cols = vars(test_type), space = "free", scales = "free") +
scale_color_gradient2(low = "blue", mid = "white", high = "red") + 
theme_classic() + theme(axis.text.x = element_text(angle = 90)) + ggtheme()

## GSEA Dia vs. Healthy

### CD4

In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "C7")

In [None]:
fgsea_sets_hs_c7 <- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
paths_fc  <- list.files("../tables/fold_change", full.names = T)
dataset_names  <- list.files("../tables/fold_change", full.names = F)

In [None]:
paths_fc

In [None]:
paths_fc[10]

In [None]:
fc.df  <- read_csv(paths_fc[10])  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Dia")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks_cd4_t0 <- deframe(fc.df)

In [None]:
ranks_cd4_t0  

In [None]:
fgseaRes <- fgsea(pathways = fgsea_sets_hs_c7, 
                 stats = ranks_cd4_t0)

In [None]:
fgseaRes <- fgsea(pathways = fgsea_sets_hs_c7, 
                 stats = ranks_cd4_t0)

fgseaRes  %>% dplyr::filter(grepl(pathway, pattern = "NAIVE_VS_EFF_MEMORY"))

In [None]:
fgseaRes  %>% arrange(pval)

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN"]],
               ranks_cd4_t0) + labs(title="GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN") +
ylim(c(-0.9,0.4))
ggsave(filename = paste0("../figures/gsea/GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN.svg"), 
       width = 9.5, height = 8, units = "cm")


In [None]:
plotEnrichment(fgsea_sets_hs_c7[["GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP"]],
               ranks_cd4_t0) + labs(title="GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP") +
ylim(c(-0.9,0.4))

ggsave(filename = paste0("../figures/gsea/GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP.svg"), 
       width = 9.5, height = 8, units = "cm")


In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "C2")
fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
plotEnrichment(fgsea_sets[["REACTOME_INTERFERON_ALPHA_BETA_SIGNALING"]],
               ranks_cd4_t0) + labs(title="REACTOME_INTERFERON_ALPHA_BETA_SIGNALING") +
ylim(c(-0.9,0.4))
ggsave(filename = paste0("../figures/gsea/REACTOME_INTERFERON_ALPHA_BETA_SIGNALING_CD4.svg"), 
       width = 9.5, height = 8, units = "cm")

### CD8

In [None]:
grep(paths_fc, pattern = "cd8_l1_full_filt", value = T)

In [None]:
fc.df  <- read_csv("../tables/fold_change/cd8_l1_full_filt.csv")  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(cluster == "Dia")  %>% 
  dplyr::select(gene, avg_log2FC)
ranks_cd8_t0 <- deframe(fc.df)

ranks_cd8_t0  

In [None]:
fgseaRes <- fgsea(pathways = fgsea_sets_hs_c7, 
                 stats = ranks_cd8_t0)

In [None]:
fgseaRes  %>% dplyr::filter(grepl(pathway, pattern = "GOLDRATH"))

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_DN"]],
               ranks_cd8_t0) + labs(title="GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_DN") +
ylim(c(-0.7,0.4))

ggsave(filename = paste0("../figures/gsea/GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_DN.svg"), 
       width = 9.5, height = 8, units = "cm")


In [None]:
plotEnrichment(fgsea_sets_hs_c7[["GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_UP"]],
               ranks_cd8_t0) + labs(title="GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_UP") +
ylim(c(-0.7,0.4))

ggsave(filename = paste0("../figures/gsea/GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_UP.svg"), 
       width = 9.5, height = 8, units = "cm")


In [None]:
m_df<- msigdbr(species = "Homo sapiens", category = "C2")
fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)

In [None]:
plotEnrichment(fgsea_sets[["REACTOME_INTERFERON_ALPHA_BETA_SIGNALING"]],
               ranks_cd8_t0) + labs(title="REACTOME_INTERFERON_ALPHA_BETA_SIGNALING") +
ylim(c(-0.7,0.4))

ggsave(filename = paste0("../figures/gsea/REACTOME_INTERFERON_ALPHA_BETA_SIGNALING_CD8.svg"), 
       width = 9.5, height = 8, units = "cm")


# DE genes visualization 3 VOLCANO

In [None]:
library(tidytext)

In [None]:
all_markers$dataset  %>% table

In [None]:
all_markers  %>% 
dplyr::filter(source == "scRNAseq_RNA")  %>% 
dplyr::filter(test_type == "Dia T0 vs Ctrl T0" &
             dataset == "cd4_l3_proliferating")  %>% 
mutate(abs_avg_log2fc = abs(avg_log2FC))  %>% 
group_by( direction)  %>% 
slice_max(n = 10, order_by = abs_avg_log2fc) 

In [None]:
test  <- all_markers  %>% 
dplyr::filter(source == "scRNAseq_RNA")  %>% 
group_by(test_type, dataset)  %>% tally  %>% 
pivot_wider(names_from = dataset, values_from = n)

In [None]:
write.csv(test, "../tables/fc_test_counts.csv")

In [None]:
options(repr.plot.width = 24, repr.plot.height = 68)
all_markers  %>% 
dplyr::filter(source == "scRNAseq_RNA")  %>% 
mutate(abs_avg_log2fc = abs(avg_log2FC))  %>% 
group_by(test_type, direction)  %>% 
slice_max(n = 50, order_by = abs_avg_log2fc)  %>% 
ggplot(aes(x = dataset, y = reorder_within(gene, avg_log2FC, test_type))) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = direction)) + 
facet_grid(rows = vars(test_type), cols = vars(dataset),
           scales = "free", space = "free") +
scale_x_reordered() +
      theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_text(angle = 90))

In [None]:
list.files("../tables/fold_change/")

In [None]:
paths_fc  <- list.files("../tables/fold_change", full.names = T)
dataset_names  <- list.files("../tables/fold_change", full.names = F)


In [None]:
read_fc  <- function(j) {
fc.df  <- read_csv(paths_fc[j])  %>% as.data.frame()
fc.df  <- fc.df  %>%  arrange(desc(avg_log2FC))
fc.df$dataset  <-   gsub(dataset_names[j], pattern = ".csv", replacement = "")
   
    return(fc.df)
    }

In [None]:
paths_fc

In [None]:
all_fcs  <- map(1:length(paths_fc), .f = read_fc)

In [None]:
all_fcs2  <- bind_rows(all_fcs)

In [None]:
all_fcs2  %>% head

In [None]:
all_fcs2$`...1`  <- NULL

In [None]:
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "full_filt", replacement = "full")

In [None]:
all_fcs2  %>% dplyr::filter(grepl(dataset, pattern = "cd4_l1_full"))  %>% 
group_by(dataset) %>% tally

In [None]:
all_fcs3

In [None]:
test2  <- all_fcs3  %>% 
group_by(Comparison, Dataset)  %>% tally  %>% 
pivot_wider(names_from = Dataset, values_from = n)

write.csv(test2, "../tables/fc_test_count2.csv")

In [None]:
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_Ctrl_vs_DiaT0", replacement = "--DiaT0 vs. Ctrl")

In [None]:
all_fcs2  %>% dplyr::filter(grepl(dataset, pattern = "cd4_l1_full"))  %>% 
group_by(dataset) %>% tally

In [None]:
gsub(grep(all_fcs2$dataset,pattern = "0_T1_vs_PR_1_T1", value = T)  %>% head, 
     pattern = "_PR_0_T1_vs_PR_1_T1", replacement = "--PR0_T1 vs. PR1_T1")

In [None]:
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_Ctrl_vs_DiaT1", replacement = "--DiaT1 vs. Ctrl")
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_ketoacidosis", replacement = "--Keto+ vs. Keto-")
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_PR_0_T0_vs_PR_0_T1", replacement = "--PR0_T0 vs. PR0_T1")
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_PR_0_T0_vs_PR_1_T0", replacement = "--PR0_T0 vs. PR1_T0")
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_PR_1_T0_vs_PR_1_T1", replacement = "--PR1_T0 vs. PR1_T1")
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_Ctrl_vs_DiaT0", replacement = "--DiaT0 vs. Ctrl")
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_DiaT0_vs_DiaT1", replacement = "--DiaT0 vs. DiaT1")

In [None]:
all_fcs2$dataset   <- gsub(all_fcs2$dataset, pattern = "_PR_0_T1_vs_PR_1_T1", replacement = "--PR0_T1 vs. PR1_T1")

In [None]:
all_fcs2  %>% dplyr::filter(grepl(dataset, pattern = "cd4_l1_full"))  %>% 
group_by(dataset) %>% tally

In [None]:
all_fcs2  <- all_fcs2  %>% separate(dataset, into = c("Dataset", "Comparison"), sep = "--", remove = F)

In [None]:
all_fcs2  <- all_fcs2  %>% dplyr::filter(!is.na(Comparison))

In [None]:
dataset_comparison  <- levels(factor(all_fcs2$dataset))

In [None]:
all_fcs2$Comparison  %>% table

In [None]:
all_fcs2$dataset  %>% table

In [None]:
all_fcs2$Comparison  %>% table

In [None]:
all_fcs2  <- all_fcs2  %>% mutate(
direction = case_when(Comparison == "DiaT1 vs. Ctrl" & cluster == "Ctrl T0" ~ "Down",
                      Comparison == "DiaT1 vs. Ctrl" & cluster == "Dia T1" ~ "Up",
                      
                      Comparison == "DiaT0 vs. Ctrl" & cluster == "Ctrl T0" ~ "Down",
                      Comparison == "DiaT0 vs. Ctrl" & cluster == "Dia T0" ~ "Up",
                      
                      Comparison == "DiaT0 vs. DiaT1" & cluster == "Dia T0" ~ "Up",
                      Comparison == "DiaT0 vs. DiaT1" & cluster == "Dia T1" ~ "Down",
                      
                      Comparison == "PR0_T0 vs. PR0_T1" & cluster == "PR_0 T0" ~ "Up",
                      Comparison == "PR0_T0 vs. PR0_T1" & cluster == "PR_0 T1" ~ "Down",
                      
                      Comparison == "PR0_T0 vs. PR1_T0" & cluster == "PR_0 T0" ~ "Up",
                      Comparison == "PR0_T0 vs. PR1_T0" & cluster == "PR_1 T0" ~ "Down",
                      
                      Comparison == "PR0_T1 vs. PR1_T1" & cluster == "PR_0 T1" ~ "Up",
                      Comparison == "PR0_T1 vs. PR1_T1" & cluster == "PR_1 T1" ~ "Down",
                      
                      Comparison == "PR1_T0 vs. PR1_T1" & cluster == "PR_1 T0" ~ "Up",
                      Comparison == "PR1_T0 vs. PR1_T1" & cluster == "PR_1 T1" ~ "Down",
                      
                      Comparison == "Keto+ vs. Keto-" & cluster == "Keto_0 T0" ~ "Down",
                      Comparison == "Keto+ vs. Keto-" & cluster == "Keto_1 T0" ~ "Up"
                      
                     ))

In [None]:
gc()

In [None]:
head(all_fcs2)

In [None]:
dataset_list  <- levels(factor(all_fcs2$dataset))


In [None]:
dataset_list

In [None]:
i = dataset_list[1]

In [None]:
all_fcs2  %>% dplyr::filter(dataset == i & p_val_adj < 0.01)  %>% 
    group_by(direction)  

In [None]:
Comparison_list  <- levels(factor(all_fcs2$Comparison))

In [None]:
i = Comparison_list[1]

In [None]:
genes  <- all_fcs2  %>% dplyr::filter(Comparison == i & p_val_adj < 0.01)  %>% 
    group_by(direction, Dataset)  %>% slice_max(n = 10, order_by = avg_log2FC)   %>% pull(gene)

In [None]:
genes

In [None]:
all_fcs2$signif = ifelse(all_fcs2$p_val_adj < 0.05, "y", "n")

In [None]:
all_fcs2  <- all_fcs2  %>% mutate(p_val_adj = if_else(p_val_adj == 0, 1e-294,p_val_adj))

In [None]:
library(tidytext)

In [None]:
Datasets_list  <- levels(factor(all_fcs2$Dataset))

In [None]:
i = Datasets_list[1]

In [None]:
all_fcs2  %>% head

In [None]:
 genes  <- all_fcs2  %>% dplyr::filter(Dataset == i & Comparison == "DiaT0 vs. Ctrl")  %>% 
    group_by(direction)  %>% slice_max(n = 10, order_by = avg_log2FC)   %>% pull(gene)
    df  <- all_fcs2  %>% dplyr::filter(Dataset == i & direction == "Up" & gene %in% genes & 
                                       Comparison == "DiaT0 vs. Ctrl"                                      )

In [None]:
genes

In [None]:
df

In [None]:
j = 0


In [None]:
plot_list[[i]]

In [None]:
plot_list

In [None]:
Comaprisons_list  <- levels(factor(all_fcs2$Comparison  ))

In [None]:
dir.create("../figures/DE_genes/pdf/")

In [None]:
j = Comparison_list[2]
    
plot_list  <- list()    
  
for(i in Datasets_list){
    genes  <- all_fcs2  %>% dplyr::filter(Dataset == i & Comparison == j)  %>% 
    group_by(direction)  %>% slice_max(n = 10, order_by = avg_log2FC)   %>% pull(gene)
    df  <- all_fcs2  %>% dplyr::filter(Dataset == i & direction == "Up" & 
                                       gene %in% genes & Comparison == j)
    p  <- df  %>% ggplot(aes(x = Dataset, y = reorder(gene, avg_log2FC))) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = avg_log2FC < 0)) + 
    scale_size_manual() + 
#facet_grid(rows = vars(test_type), cols = vars(dataset),
#           scales = "free", space = "free") +
scale_x_reordered() +
      theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_blank(),
       axis.ticks.x = element_blank()) + NoLegend() + ylab("") + xlab("") +
    ggtitle(i)
    plot_list[[i]]  <- p
}  
    
pdf(file = paste0("../figures/DE_genes/pdf/",j,".pdf"), width = 15, height = 18)
title <- ggdraw() + 
  draw_label(
   j,
    fontface = 'bold',
    x = 0,
    hjust = 0
  ) +
  theme(
    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 7)
  )
plot_grid(
  title, cowplot::plot_grid(plotlist = plot_list, ncol = 7) ,
  ncol = 1,
  # rel_heights values control vertical title margins
  rel_heights = c(0.05,1)
)
dev.off()
    



In [None]:
all_fcs2  %>% dplyr::filter(Comparison == j)  %>% 
  ggplot(aes(x = avg_log2FC, y = -log10(p_val_adj), color = Dataset)) + 
  geom_point() + 
        theme_minimal() +
        #geom_text_repel() +
        #scale_color_manual(values=c("blue", "black", "red")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red")

In [None]:
library(ggrepel)
# plot adding up all layers we have seen so far
       

In [None]:
head(all_fcs2)

In [None]:
gc()

In [None]:
rm(all_fcs)

In [None]:
ls()

In [None]:
all_fcs3  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC)>0  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>0.9&p_val_adj<0.05,gene,NA_character_))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.6&p_val_adj<0.05,Dataset,"grey"))       

In [None]:
all_fcs3  %>% head

In [None]:
all_fcs3  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC>0 & !grepl(gene, pattern = "^MTRN"))  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>1&p_val_adj<0.05,gene,
                     ifelse(abs_avg_log2FC2>0.6&p_val_adj<10e-100,gene,NA_character_)))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.6&p_val_adj<0.05,Dataset,"grey"))       

In [None]:
all_fcs3  %>% head

In [None]:
all_fcs3  %>% dplyr::filter(Comparison == j)  %>% 
  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  geom_point() + 
        theme_minimal() +
        geom_text_repel() +
        #scale_color_manual(values=c("blue", "black", "red")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red")

In [None]:
all_fcs3  %>% dplyr::filter(Comparison == j)  %>% 
  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  geom_point() + 
        theme_minimal() +
        geom_text_repel() +
        #scale_color_manual(values=c("blue", "black", "red")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red")

In [None]:
all_fcs3$Dataset  %>% table  %>% length

In [None]:
options(repr.plot.width = 13, repr.plot.height = 7)
all_fcs3  %>% dplyr::filter(Comparison == j)  %>% 
  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label, shape = color)) + 
  geom_point() + 
        theme_minimal() +
        geom_text_repel() +
        scale_color_manual(values=c(scales::hue_pal()(17), "grey88")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25))

In [None]:
options(repr.plot.width = 13, repr.plot.height = 7)
all_fcs3  %>% dplyr::filter(Comparison == j)  %>% 
  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label, shape = color)) + 
  geom_point() + 
        theme_minimal() +
        geom_text_repel() +
        scale_color_manual(values=c(scales::hue_pal()(17), "grey88")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25))

In [None]:
all_fcs3$cd4_vs_cd8  <- substr(all_fcs3$Dataset,1,3)

In [None]:
j

In [None]:
options(repr.plot.width = 18, repr.plot.height = 6)
df10  <- all_fcs3  %>% dplyr::filter(Comparison == j) 
df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label, shape = color)) + 
  geom_point() + 
        theme_minimal() +
        geom_text_repel(size = 6) +
facet_wrap(~cd4_vs_cd8) +
        scale_color_manual(values=c(scales::hue_pal()(length(levels(factor(df10$color)))-1), "grey88")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()

In [None]:
Comparison_list

In [None]:
all_fcs3  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC>0 & !grepl(gene, pattern = "^MTRN"))  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>0.9&p_val_adj<0.05,gene,
                     ifelse(abs_avg_log2FC2>0.6&p_val_adj<10e-100,gene,NA_character_)))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.6&p_val_adj<0.05,Dataset,"grey"))       

In [None]:
all_fcs3$cd4_vs_cd8  <- substr(all_fcs3$Dataset,1,3)

In [None]:
options(repr.plot.width = 22, repr.plot.height = 8)

for(j in Comparison_list){
    df10  <- all_fcs3  %>% dplyr::filter(Comparison == j) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2)) + 
        theme_minimal() +
        geom_text_repel(size = 6) +
facet_wrap(~cd4_vs_cd8) +
        scale_color_manual(values=c(scales::hue_pal()(length(levels(factor(df10$color)))-1), "grey88")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
}

In [None]:
for(j in Comparison_list){
    df10  <- all_fcs3  %>% dplyr::filter(Comparison == j) 
 print(df10  %>%  group_by(Dataset)  %>% tally)
 
}

### Correct Colors

In [None]:
colors_of_populations = c("Mazda RX4" = "red", "Mazda RX4 Wag" = "blue", "Datsun 710" = "green")

In [None]:
levels(factor(all_fcs3$Dataset))

In [None]:
all_fcs3  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC>0 & !grepl(gene, pattern = "^MTRN"))  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>0.585&p_val_adj<0.05,gene,
                     ifelse(abs_avg_log2FC2>0.585&p_val_adj<10e-100,gene,NA_character_)))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.585&p_val_adj<0.05,Dataset,"grey"))   %>% 
mutate(cd4_vs_cd8 = substr(Dataset,1,3))

In [None]:
colors_of_populations = c(
    'cd4_l1_full' = "#ffa66aff",
    'cd4_l2_subcluster'  = "#ffa66aff",
    'cd4_l2_unc'  = "#cd3333ff",
    'cd4_l3_isaghi'  = "#a46464ff",
    'cd4_l3_naive'  = "#a58413ff",
    'cd4_l3_nfkb'  = "#a46c43ff",
    'cd4_l3_proliferating'  = "#9b3097ff", 
    'cd4_l3_temra'  = "#7d252aff",
    'cd4_l3_tfh'  = "#8e6a1dff",
    'cd4_l3_th1th17'  = "#a94e4eff",
    'cd4_l3_th2'  = "#b36672ff",
    'cd4_l3_treg'  = "#ba6d8dff",
    'cd8_l1_full'  = "#67934aff",
    'cd8_l2_nk'  = "#105292ff",
    'cd8_l2_subcluster' = "#67934aff", 
    'cd8_l2_unc'  = "#6d00c0ff",
    'cd8_l3_naive'  = "#004455ff",
    'cd8_l3_prolif'  = "#5f711aff",
    'cd8_l3_tcm'  = "#427c39ff",
    'cd8_l3_tem'  = "#004455ff",
    'cd8_l3_temra'  = "#16481fff",
    "grey" = "grey88"
)

In [None]:
for(j in Comparison_list){
    df10  <- all_fcs3  %>% dplyr::filter(Comparison == j & !(Dataset %in% c("cd4_l1_full", "cd8_l2_subcluster", 
                                                                            "cd4_l2_subcluster", "cd8_l1_full",
                                                                           "cd8_l2_nk"))) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2)) + 
        theme_minimal() +
        geom_text_repel(size = 6) +
facet_wrap(~cd4_vs_cd8) +
        scale_color_manual(values=colors_of_populations) +
        geom_vline(xintercept=c(-0.585, 0.585), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
}

In [None]:
all_fcs4  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC>0 & !grepl(gene, pattern = "^MTRN"))  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>0.322&p_val_adj<0.05,gene,
                     ifelse(abs_avg_log2FC2>0.322&p_val_adj<10e-100,gene,NA_character_)))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.322&p_val_adj<0.05,Dataset,"grey"))   %>% 
mutate(cd4_vs_cd8 = substr(Dataset,1,3))

In [None]:
options(repr.plot.width = 12, repr.plot.height = 6)

for(j in Comparison_list){
    df10  <- all_fcs4  %>% dplyr::filter(Comparison == j & (Dataset %in% c("cd4_l1_full", "cd8_l2_subcluster", 
                                                                          "cd8_l2_nk"))) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2)) + 
        theme_minimal() +
        geom_text_repel(size = 6) +
        scale_color_manual(values=colors_of_populations) +
        geom_vline(xintercept=c(-0.322, 0.322), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
}

### Without Y genes

In [None]:
library(biomaRt)
mart <- useMart(biomart="ensembl", dataset="hsapiens_gene_ensembl")

In [None]:
results <- getBM(attributes = c("chromosome_name", "hgnc_symbol"),
           filters = "chromosome_name", values = "Y", mart = mart)

In [None]:
results

In [None]:
all_fcs4  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC>0 
              & !grepl(gene, pattern = "^MTRN")
             & !grepl(gene, pattern = "^HLA-")
             & !grepl(gene, pattern = "orf")
             & !grepl(gene, pattern = "^AP0")
             & !grepl(gene, pattern = "^H1-"))  %>% 
dplyr::filter(!(gene %in% results$hgnc_symbol))  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>0.322&p_val_adj<0.05,gene,
                     ifelse(abs_avg_log2FC2>0.322&p_val_adj<10e-100,gene,NA_character_)))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.322&p_val_adj<0.05,Dataset,"grey"))   %>% 
mutate(cd4_vs_cd8 = substr(Dataset,1,3))

In [None]:
all_fcs4  %>% dplyr::filter(gene == "ASCL2")

In [None]:
options(repr.plot.width = 12, repr.plot.height = 6)

for(j in Comparison_list){
    df10  <- all_fcs4  %>% dplyr::filter(Comparison == j & (Dataset %in% c("cd4_l1_full", "cd8_l2_subcluster"))) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2), dpi = 300) + 
        theme_minimal() +
        geom_vline(xintercept=c(-0.322, 0.322), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +geom_text_repel(size = 3) +
        scale_color_manual(values=colors_of_populations) +
        scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
    ggsave(paste0("../figures/volcano/cd4_and_cd8_",j,".svg"), width = 10, height = 6)
}

In [None]:
library(ggrepel)

In [None]:
all_fcs5  <- all_fcs2  %>% 
dplyr::filter(avg_log2FC>0 
              & !grepl(gene, pattern = "^MTRN")
             & !grepl(gene, pattern = "^HLA-")
             & !grepl(gene, pattern = "orf")
             & !grepl(gene, pattern = "^AP0")
             & !grepl(gene, pattern = "^H1-"))  %>% 
dplyr::filter(!(gene %in% results$hgnc_symbol))  %>% 
mutate(avg_log2FC2 = ifelse(direction == "Down",avg_log2FC*-1,avg_log2FC))  %>% 
mutate(abs_avg_log2FC2 = abs(avg_log2FC2))  %>% 
mutate(label = ifelse(abs_avg_log2FC2>0.2&p_val_adj<0.05,gene,
                     ifelse(abs_avg_log2FC2>0.2&p_val_adj<10e-100,gene,NA_character_)))  %>% 
mutate(color = ifelse(abs_avg_log2FC2>0.2&p_val_adj<0.05,Dataset,"grey"))   %>% 
mutate(cd4_vs_cd8 = substr(Dataset,1,3))

In [None]:
options(repr.plot.width = 12, repr.plot.height = 6)

for(j in Comparison_list){
    df10  <- all_fcs5 %>% dplyr::filter(Comparison == j & (Dataset %in% c("cd4_l3_treg"))) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2), dpi = 300) + 
        theme_minimal() +
        geom_vline(xintercept=c(-0.2, 0.2), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +geom_text_repel(size = 6, max.overlaps = 15) +
        scale_color_manual(values=c("#ff5599ff","grey88")) +
        scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
    ggsave(paste0("../figures/volcano/treg_",j,".svg"), width = 10, height = 6)
}

In [None]:
for(j in Comparison_list){
    df10  <- all_fcs5 %>% dplyr::filter(Comparison == j & (Dataset %in% c("cd8_l2_unc"))) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2), dpi = 300) + 
        theme_minimal() +
        geom_vline(xintercept=c(-0.2, 0.2), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +geom_text_repel(size = 6) +
        scale_color_manual(values=c("#306aa3ff","grey88")) +
        scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
    ggsave(paste0("../figures/volcano/unc_",j,".svg"), width = 10, height = 6)
}

In [None]:
cd4  <- readRDS("../data/processed/L1/cd4_l1_full_filt.rds")

avgexp = AggregateExpression(subset(cd4, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = F, group.by = "Patient_Time", 
                          assay = "RNA")

In [None]:
avgexp  <- NormalizeData(avgexp)
avgexp  <- ScaleData(avgexp)

In [None]:
dir.create("../figures/volcano")

In [None]:
options(repr.plot.width = 22, repr.plot.height = 8)

for(j in Comparison_list){
    df10  <- all_fcs4  %>% dplyr::filter(Comparison == j & !(Dataset %in% c("cd4_l1_full", "cd8_l2_subcluster", 
                                                                            "cd4_l2_subcluster", "cd8_l1_full",
                                                                           "cd8_l2_nk"))) 
p  <- df10  %>%  ggplot(aes(x = avg_log2FC2, y = -log10(p_val_adj), color = color, label = label)) + 
  ggrastr::rasterize(geom_point(aes(shape = color), size = 2)) + 
        theme_minimal() +
        geom_text_repel(size = 6) +
facet_wrap(~cd4_vs_cd8) +
        scale_color_manual(values=colors_of_populations) +
        geom_vline(xintercept=c(-0.322, 0.322), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red") +
    
    scale_shape_manual(values = c(21:25,21:25,21:25,21:25)) + 
ggtitle(j) + ggtheme()
    print(p)
    ggsave(paste0("../figures/volcano/all_populations_",j,".svg"), width = 17, height = 6)
}