# Part 7: Analysis of CD8 T cell subsets

In this part, we focus on the subpopulations of CD8+ T cells. We analyze the frequencies of these populations and calculate differentially expressed genes between T1D and helathy donors. 


In [None]:
source("diabetes_analysis_v07.R")

In [None]:
rank_score_func <- function(df){
df <- df %>% mutate(score = -1*log(p_val_adj+(10^-310))*avg_log2FC*(pct.1/(pct.2+10^-300)))
return(df)
}

# Analysis of subsets

We will load the preprocessed datasets of CD8 T cells. 

In [None]:
cd8_l3_tem  <- readRDS("../data/processed/L3/cd8_l3_tem.rds")
cd8_l3_naive  <- readRDS("../data/processed/L3/cd8_l3_naive.rds")
cd8_l3_tcm  <- readRDS("../data/processed/L3/cd8_l3_tcm.rds")
cd8_l3_temra  <- readRDS("../data/processed/L3/cd8_l3_temra.rds")

In [None]:
cd8_l3_prolif  <- readRDS("../data/processed/L3/cd8_l3_prolif.rds")

In [None]:
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")

In [None]:
cd8_l2_nk  <- readRDS("../data/processed/L2/cd8_l2_nk.rds")
cd8_l2_unc  <- readRDS("../data/processed/L2/cd8_l2_unc.rds")

In [None]:
cd8_l1_full_filt  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
cd8_l1_full_filt@meta.data

In [None]:
DimPlot(cd8_l1_full_filt, group.by = "annotations_l2")

In [None]:
cd8_l3_list  <- list(cd8_l3_tem, cd8_l3_naive, cd8_l3_tcm, cd8_l3_temra, cd8_l3_prolif, 
                    cd8_l2_nk, cd8_l2_unc, cd8_l2_subcluster, cd8_l1_full_filt)

names(cd8_l3_list) <- c("cd8_l3_tem", "cd8_l3_naive", "cd8_l3_tcm", "cd8_l3_temra", "cd8_l3_prolif",
                       "cd8_l2_nk", "cd8_l2_unc", "cd8_l2_subcluster", "cd8_l1_full_filt")

# Plots of Unconventional populations

We will create some additional analyses of unconventional populations. 

In [None]:
options(repr.plot.width = 6, repr.plot.height = 6)
DimPlot(cd8_l1_full_filt, raster = T, pt.size = 2, group.by = "annotations_l2", cols = c("dodgerblue3","grey88","grey88","grey88","grey88","grey88",
                                                                c(scales::hue_pal() (8))[c(7,8)]
)) + NoLegend()


ggsave(filename = paste0(paste0("../figures/Dimplots_unconventional/unconventional_cd8.png")), width = 12, height = 12, units = "cm")
ggsave(filename = paste0(paste0("../figures/Dimplots_unconventional/unconventional_cd8.svg")), width = 12, height = 12, units = "cm")

In [None]:
cd8_l1_full_filt$annotations_l2  %>% table

Let's calculate and plot the markers of unconventional cells. 

In [None]:
Idents(cd8_l1_full_filt)  <- cd8_l1_full_filt$annotations_l2
mrk1  <- FindMarkers(cd8_l1_full_filt, `ident.1` = "CD8 Unconventional T cells---gd T cells")
mrk2  <- FindMarkers(cd8_l1_full_filt, `ident.1` = "CD8 Unconventional T cells---MAIT cells")
mrk3  <- FindMarkers(cd8_l1_full_filt, `ident.1` = "CD8 NK cells---NK cells")


In [None]:
markers  <- rev(c("CD3D","TRAC", "TRGC1", 'TRDC',"NKG7","CXCR6",
                  "LTB","FCER1G","IL7R",
                  "PTGDS","CCL4","GNLY","KLRB1",
                  "CD8A","CD8B","NCR1","ZBTB16",
                  "GZMK","GZMB",
                 "KLRG1",
                  "TNF","CX3CR1"))

In [None]:
avgexp = AverageExpression(cd8_l1_full_filt, features = markers,
                           return.seurat = F, group.by = "annotations_l2", 
                          assay = "RNA")

In [None]:
options(repr.plot.width = 9.5, repr.plot.height = 3.5)
pheatmap(t(avgexp$RNA)[c(1,7,8,6,5,4,2,3),], main = "", 
         scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 12)

In [None]:
options(repr.plot.width = 9.5, repr.plot.height = 3.5)
pheatmap(t(avgexp$RNA)[c(1,7,8,6,5,4,2,3),], main = "", 
         scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
         filename = "../figures/heatmaps/heatmap_cd8_unconventional.pdf",
         width = 8, height = 3,
                  fontsize = 12) 
         

Next, we will plot the CollecTRI transcription factors. 

In [None]:
DefaultAssay(cd8_l1_full_filt)  <- "CollecTRI"

In [None]:
Idents(cd8_l1_full_filt)  <- cd8_l1_full_filt$annotations_manual

In [None]:
mrk  <- FindAllMarkers(cd8_l1_full_filt, only.pos = T)

In [None]:
mrk  %>% filter(cluster == "CD8 T cells")

In [None]:
markers  <- rev(c("SPIC","ZFPM1",  "CEBPZ", "EOMES", 'SPI1',"ZNF395",
                  "RORC","MAFB","CREB3",
                  "STAT5A","NFIL3","FOXA2",
                  "CREB1","NFKB1","NFIL3",
                  "FOXA2","NR3C1","BRD4","ZBTB17",
                  "ETS1","HIF1A","NFKB2","ZBTB16"
                 ))

In [None]:
avgexp = AverageExpression(cd8_l1_full_filt, features = markers,
                           return.seurat = F, group.by = "annotations_l2", 
                          assay = "CollecTRI")

In [None]:
options(repr.plot.width = 7, repr.plot.height = 3.5)
pheatmap(t(avgexp$CollecTRI)[c(1,7,8,6,5,4,2,3),], main = "", 
         scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 12)

In [None]:
options(repr.plot.width = 9.5, repr.plot.height = 3.5)
pheatmap(t(avgexp$CollecTRI)[c(1,7,8,6,5,4,2,3),], main = "", 
         scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
         filename = "../figures/heatmaps/heatmap_cd8_unconventional_collectri.pdf",
         width = 8, height = 3,
                  fontsize = 12) 

## Heatmaps of marker genes

In [None]:
options(repr.plot.width = 7, repr.plot.height = 6)
DimPlot(cd8_l2_subcluster, group.by = "annotations_manual")

In [None]:
markers  <- rev(c("CCR7","SELL",  "TCF7", "LEF1", 'BACH2',
                  "XCL1","KLRC2","IL2RB","ZNF683", "ITGAM",
                  "CXCR3","ITGA4","GZMK", "CD28",
                  "CCL5", "EOMES","ITGB1", "KLRB1","KLRG1",
                  "CCL4","ZEB2","PRF1","TBX21","TOX","IFNG",
                  "GZMA","TNF","GZMB","CX3CR1","HLA-DRA", 
                  "MKI67", "PCNA", "MCM6" ))

In [None]:
avgexp = AverageExpression(cd8_l2_subcluster, features = markers,
                           return.seurat = F, group.by = "annotations_manual", 
                          assay = "RNA")

In [None]:
avgexp$RNA

In [None]:
options(repr.plot.width = 9.5, repr.plot.height = 2.5)
pheatmap(t(avgexp$RNA[,c(2,1,4,3,5)]), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 12)

In [None]:
dir.create("../figures/heatmaps/")

In [None]:
pheatmap(t(avgexp$RNA[,c(2,1,4,3,5)]), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", filename = "../figures/heatmaps/heatmap_cd8_l2.pdf",
         width = 8, height = 3,
                  fontsize = 9)

## Heatmap Dorothea

In [None]:
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")

In [None]:
cd8_l2_nk  <- readRDS("../data/processed/L2/cd8_l2_nk.rds")
cd8_l2_unc  <- readRDS("../data/processed/L2/cd8_l2_unc.rds")

In [None]:
cd8_l3_tem  <- readRDS("../data/processed/L3/cd8_l3_tem.rds")
cd8_l3_naive  <- readRDS("../data/processed/L3/cd8_l3_naive.rds")
cd8_l3_tcm  <- readRDS("../data/processed/L3/cd8_l3_tcm.rds")
cd8_l3_temra  <- readRDS("../data/processed/L3/cd8_l3_temra.rds")
cd8_l3_prolif  <- readRDS("../data/processed/L3/cd8_l3_prolif.rds")

In [None]:
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")
cd8_l2_nk  <- readRDS("../data/processed/L2/cd8_l2_nk.rds")
cd8_l2_unc  <- readRDS("../data/processed/L2/cd8_l2_unc.rds")
cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
DefaultAssay(cd8_l2_subcluster)  <- "CollecTRI"

In [None]:
Idents(cd8_l2_subcluster)  <- cd8_l2_subcluster$annotations_manual

In [None]:
coll  <- FindAllMarkers(cd8_l2_subcluster, only.pos = T)

In [None]:
genes  <- coll  %>% group_by(cluster)  %>% slice_head(n = 10)  %>% pull(gene)

In [None]:
dir.create("../tables/cd8/collectri_annotations/")

In [None]:
write.csv(coll, file = "../tables/cd8/collectri_annotations/cd8_l2_subcluster.csv")

In [None]:
avgexp = AverageExpression(cd8_l2_subcluster, features = genes,
                           return.seurat = F, group.by = "annotations_manual", 
                          assay = "CollecTRI")

avgexp$CollecTRI

In [None]:
genes2  <- rev(c("TCF7", "LEF1", "CTCFL", "KLF2","NR4A3","BACH2",
             "CTCF","NOTCH1","KLF3","RBPJ","SMAD3","NFAT5","ID3",
             "STAT6","FOS","JUN","RELB","IRF6","CEBPA", "CEBPD",
             "TBX21","PRDM1","ZNF395","ID2","EOMES","STAT4",
             "RUNX3","IRF2","NFKB","TP53","MYC","E2F4","ZEB1"
))
avgexp = AverageExpression(cd8_l2_subcluster, features = genes2,
                           return.seurat = F, group.by = "annotations_manual", 
                          assay = "CollecTRI")

In [None]:
options(repr.plot.width = 9.5, repr.plot.height = 2.5)
pheatmap(t(avgexp$CollecTRI[,c(2,1,4,3,5)]), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 12)

In [None]:
pheatmap(t(avgexp$CollecTRI[,c(2,1,4,3,5)]), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", filename = "../figures/heatmaps/heatmap_cd8_l2_collecTRI.pdf",
         width = 8, height = 2,
                  fontsize = 9)

# Frequencies L2

In [None]:
df4  <- create_df4(cd8_l2_subcluster)

In [None]:
df4

In [None]:
 p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(1,2)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 9, repr.plot.height = 7)
p5

In [None]:
ggsave("../figures/subset_characterization/cd8_subsets_in_condition.svg",
       width = 25, height = 12, units = "cm")

In [None]:
    p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
dplyr::filter(Patient_ID != "116")  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(2,3)), paired = TRUE)+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 9, repr.plot.height = 7)
p5

### Unconventional and L2

In [None]:
cd8_l1_full_filt$annotations_manual  %>% table

In [None]:
cd8_l1_full_filt$annotations_manual  <- ifelse(cd8_l1_full_filt$annotations_manual == "Unconventional T cells",
                                              cd8_l1_full_filt$annotations_l2, cd8_l1_full_filt$annotations_manual)

In [None]:
DimPlot(cd8_l1_full_filt, group.by = "annotations_manual")

In [None]:
grep(rownames(cd8_l1_full_filt@assays$RNA@counts), pattern = "TRG", value = T)

In [None]:
dir.create("../figures/Feature_unconventional/")
FeaturePlot(cd8_l1_full_filt, features = c("TRGV9"), min.cutoff = 0, max.cutoff = 1, 
            raster = T, raster.dpi = c(900,900), pt.size = 2)

In [None]:
ggsave(filename = paste0(paste0("../figures/Feature_unconventional/Trgv9_cd8.png")), width = 14, height = 12, units = "cm")
ggsave(filename = paste0(paste0("../figures/Feature_unconventional/Trgv9_cd8.svg")), width = 14, height = 12, units = "cm")

In [None]:
dir.create("../figures/Feature_unconventional/")
FeaturePlot(cd8_l1_full_filt, features = c("TRGC1"), min.cutoff = 0, max.cutoff = 1, 
            raster = T, raster.dpi = c(900,900), pt.size = 2)

In [None]:
ggsave(filename = paste0(paste0("../figures/Feature_unconventional/Trgc1_cd8.png")), width = 14, height = 12, units = "cm")
ggsave(filename = paste0(paste0("../figures/Feature_unconventional/Trgc1_cd8.svg")), width = 14, height = 12, units = "cm")

In [None]:
FeaturePlot(cd8_l1_full_filt, features = c("TRDC"), min.cutoff = 0, max.cutoff = 2, 
            raster = T, raster.dpi = c(900,900), pt.size = 2)
ggsave(filename = paste0(paste0("../figures/Feature_unconventional/Trdc_cd8.png")), width = 14, height = 12, units = "cm")
ggsave(filename = paste0(paste0("../figures/Feature_unconventional/Trdc_cd8.svg")), width = 14, height = 12, units = "cm")

In [None]:
FeaturePlot(cd8_l1_full_filt, features = c("TRDC"), min.cutoff = 0, max.cutoff = 2)

In [None]:
df4  <- create_df4(cd8_l1_full_filt)

In [None]:
df4

In [None]:
 p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(1,2)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 9, repr.plot.height = 7)
p5

In [None]:
ggsave("../figures/subset_characterization/cd8_unc_subsets_in_condition.svg",
       width = 18, height = 12, units = "cm")

In [None]:
    p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
dplyr::filter(Patient_ID != "116")  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(2,3)), paired = TRUE)+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 9, repr.plot.height = 7)
p5

# Frequencies and counts of all populations

In [None]:
cd8_l1_full_filt

In [None]:
all_counts  <- read_csv("../tables/populations_freq/all_levels_counts_cd8.csv")

In [None]:
all_counts$Experiment_ID  %>% table

In [None]:
all_counts

In [None]:
df3  <- all_counts %>% 
  group_by(Sample_ID, Level) %>% 
  mutate(freq_from_total = n / sum(n)) 

In [None]:
df3

In [None]:
## Set parent population

In [None]:
df3  <- df3  %>% separate(annotations, into = c("annot_l1","annot_l2",NA), sep = "---", remove = F)  %>% 
mutate(Parent_annotation = case_when(
Level == "L1" ~ "CD8",
Level == "L2" ~ annot_l1,
Level == "L3" ~ paste0(annot_l1, "---" ,annot_l2)
))

In [None]:
df3

In [None]:
## For each level and each patient calculate the total count per parent population

In [None]:
levels_l1  <- all_counts %>% 
  filter(Level == "L1")  %>% 
pull(annotations)  %>% unique
levels_l2  <- all_counts %>% 
  filter(Level == "L2")  %>% 
pull(annotations)  %>% unique

In [None]:

for(i in 1:length(levels_l1)) {
    
    df_filt  <- df3  %>% filter(grepl(annotations, pattern = levels_l1[i]) & Level == "L2")

    df_filt  <- df_filt  %>% 
            group_by(Sample_ID)  %>% 
        summarise(total_per_patient = sum(n))

    df_filt$Level = "L2"

    df_filt$Parent_annotation = levels_l1[i]

    if(i > 1){
       
        df_sum_of_parent  <- rbind(df_sum_of_parent, df_filt)
        
    } else {
        df_sum_of_parent  <- df_filt
        
        
    }

}

for(i in 1:length(levels_l2)) {
    
    df_filt  <- df3  %>% filter(grepl(annotations, pattern = levels_l2[i]) & Level == "L3")

    df_filt  <- df_filt  %>% 
            group_by(Sample_ID)  %>% 
        summarise(total_per_patient = sum(n))

    df_filt$Level = "L3"

    df_filt$Parent_annotation = levels_l2[i]

    df_sum_of_parent  <- rbind(df_sum_of_parent, df_filt)
   

}

In [None]:
df_sum_of_parent

In [None]:
dim(df3)

In [None]:
df4  <- left_join(df3, df_sum_of_parent)

In [None]:
dim(df4)

In [None]:
df4

In [None]:
df4$freq_from_parent  <- df4$n/df4$total_per_patient

In [None]:
df4

In [None]:
df4$pct_from_total  <- df4$freq_from_total*100
df4$pct_from_parent  <- df4$freq_from_parent*100


In [None]:
df4$freq_from_parent <- ifelse(is.na(df4$freq_from_parent) & df4$Level != "L1", 0, df4$freq_from_parent)
df4$pct_from_parent  <- ifelse(is.na(df4$pct_from_parent) & df4$Level != "L1", 0, df4$pct_from_parent)

## Ordered frequency plots

In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)

df4   %>% 
ggplot(aes(x = Condition,
             y = pct_from_parent)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0, height = 0), 
                size = 1, stackdir='center', aes(color = Condition)) + 
  theme_classic() + xlab("") + ylab("Value") +
facet_wrap(~factor(annotations, labels = gsub(levels(factor(annotations)), 
                                              pattern = "---", replacement = "\n")),
           scales = "free", ncol = 6) +
 ylim(0,NA) +
ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 22),
          axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(colour = "black")) + ggtitle("CD8 Pct from parent")

In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)

df4   %>% 
ggplot(aes(x = Condition,
             y = pct_from_total)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0, height = 0), 
                size = 1, stackdir='center', aes(color = Condition)) + 
  theme_classic() + xlab("") + ylab("Value") +
facet_wrap(~factor(annotations, labels = gsub(levels(factor(annotations)), 
                                              pattern = "---", replacement = "\n")),
           scales = "free", ncol = 6) +
 ylim(0,NA) +
ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 22),
          axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(colour = "black")) + ggtitle("CD8 Pct from total")

## Plot of differences in Conditions

In [None]:
annotations_to_test  <- df4$annotations  %>% unique()
comparisons_to_test  <- c("Ctrl T0", "Dia T0", "Dia T1")

In [None]:
annotations_to_test

### Freq from total

In [None]:
for(i in 1:length(annotations_to_test)){


## Comparison Dia T0 vs Dia T1
    j = 2
    k = 3
df  <- df4  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_total)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = "Dia T0 vs Dia T1",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[1],
                         mean_ctrl = df2$mean[2], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% 
mutate(ratio = (mean_dia)/(mean_ctrl))
    
if(i == 1){
    df_final  <- df_all
} else {
    df_final  <- rbind(df_final, df_all)
}

## Comparison of Dia vs controls in both times

    j = 2
    k = 1
df  <- df4  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_total)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = paste(comparisons_to_test[j], "vs", comparisons_to_test[k]),
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
    df_final  <- rbind(df_final, df_all)
    

    j = 3
    k = 1
df  <- df4  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_total)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = paste(comparisons_to_test[j], "vs", comparisons_to_test[k]),
                         estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
    df_final  <- rbind(df_final, df_all)


## Comparison of Dia - partial remission vs. no remission

df  <- df4  %>% 
    filter(annotations == annotations_to_test[i] & Disease == "Dia" & !is.na(Condition2))  %>% 
    dplyr::select(Condition = Condition2, value = pct_from_total)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = "PR_0 vs PR_1",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[1],
                         mean_ctrl = df2$mean[2], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
     
    df_final  <- rbind(df_final, df_all)
    
    }

In [None]:
df_final  %>% arrange(pval)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)
df_final  %>% 
mutate(color = ifelse(estimate < 0 & upper < 0, 
                      "1", 
                      ifelse(estimate > 0 & lower > 0, "2", "3")))  %>% 
  ggplot(aes(estimate, name, color = color)) +
  geom_vline(xintercept = 0, color = "gray75") +
  geom_linerange(aes(xmin = lower, xmax = upper),
                 size = 1.5,
                 alpha = 0.5) +
  geom_point(size = 4) +
  theme_minimal(base_size = 16) +
  scale_color_manual(values = c("green4", "red3", "grey"), guide = "none") +
facet_wrap(~comparison, ncol = 4) +
  labs(title = "", y = NULL,
       x = "Probability \n(95% Confidence Intervals)") +
  theme(axis.text.y = element_text(hjust = 0, size = 18),
        panel.grid = element_blank()) + ggtitle("Pct from Total")

In [None]:
write.csv(df_final, "../tables/populations_freq/cd8_pct_from_total.csv")

### Freq from parent

In [None]:
annotations_to_test  <- df4$annotations  %>% unique()
annotations_to_test  <- annotations_to_test[3:length(annotations_to_test)]

In [None]:
annotations_to_test 

In [None]:
df_final  <- NULL

In [None]:
df5  <- df4  %>% filter(Level != "L1")

for(i in c(5:length(annotations_to_test))){
print(i)

## Comparison Dia T0 vs Dia T1
    j = 2
    k = 3
df  <- df5  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_parent)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = "Dia T0 vs Dia T1",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[1],
                         mean_ctrl = df2$mean[2], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% 
mutate(ratio = (mean_dia)/(mean_ctrl))
    
if(i == 1){
    df_final  <- df_all
} else {
    df_final  <- rbind(df_final, df_all)
}

## Comparison of Dia vs controls in both times

    j = 2
    k = 1
df  <- df5  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_parent)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = paste(comparisons_to_test[j], "vs", comparisons_to_test[k]),
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
    df_final  <- rbind(df_final, df_all)
    

    j = 3
    k = 1
df  <- df5  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_parent)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = paste(comparisons_to_test[j], "vs", comparisons_to_test[k]),
                         estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
    df_final  <- rbind(df_final, df_all)


## Comparison of Dia - partial remission vs. no remission

df  <- df5  %>% 
    filter(annotations == annotations_to_test[i] & Disease == "Dia" & !is.na(Condition2))  %>% 
    dplyr::select(Condition = Condition2, value = pct_from_parent)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = "PR_0 vs PR_1",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[1],
                         mean_ctrl = df2$mean[2], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
     
    df_final  <- rbind(df_final, df_all)
    
    }

df_final  %>% arrange(pval)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)
df_final  %>% 
mutate(color = ifelse(estimate < 0 & upper < 0, 
                      "1", 
                      ifelse(estimate > 0 & lower > 0, "2", "3")))  %>% 
  ggplot(aes(estimate, name, color = color)) +
  geom_vline(xintercept = 0, color = "gray75") +
  geom_linerange(aes(xmin = lower, xmax = upper),
                 size = 1.5,
                 alpha = 0.5) +
  geom_point(size = 4) +
  theme_minimal(base_size = 16) +
  scale_color_manual(values = c("green4", "red3", "grey"), guide = "none") +
facet_wrap(~comparison, ncol = 4) +
  labs(title = "", y = NULL,
       x = "Probability \n(95% Confidence Intervals)") +
  theme(axis.text.y = element_text(hjust = 0, size = 18),
        panel.grid = element_blank())

write.csv(df_final, "../tables/populations_freq/cd8_pct_from_parent.csv")

# Populations - correlations with C-peptide

In [None]:
fast  <- cd8_l1_full_filt@meta.data  %>% dplyr::select(Sample_ID, fasting_cpept_T1)  %>% unique

In [None]:
model_table

In [None]:
populations  <- model_table  %>% 
 left_join(fast)

In [None]:
populations

In [None]:
calc_correlation  <- function(i){
    df  <- populations  %>% dplyr::select(i,52)  %>% filter(!is.na(52))
    colnames(df)  <- c("value","fasting_cpept_T1")
    cor  <- cor.test(df$value, df$fasting_cpept_T1)
    res_df  <- data.frame(population = colnames(populations)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*40>1,1,cor$p.value*40))
    return(res_df)
}

In [None]:
test  <- future_map(11:51, calc_correlation)

In [None]:
test2  <- bind_rows(test)

In [None]:
test2  %>% arrange(pval)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 4)
for(i in (test2  %>% arrange(pval)  %>% pull(population))[1:10]){
    df2 <- populations  %>% dplyr::select(which(colnames(populations)==i),fasting_cpept_T1 = 52) 
    colnames(df2)  <- c("value", "fasting_cpept_T1")
    p  <- df2 %>%  ggplot(aes(x=value, y=fasting_cpept_T1)) +
  geom_point(shape = 16, size = 2) +
 geom_smooth(method=lm) + ggtitle(i) 
    print(p)
    }

In [None]:
cd8_l1_full_filt@meta.data  %>% group_by(Patient_ID, Experiment_ID, Disease)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% group_by(Experiment_ID, Disease)  %>% tally  %>% arrange(Experiment_ID)

# Populations - correlations with age

In [None]:
calc_correlation  <- function(i){
    df  <- populations  %>% dplyr::select(i,6)  %>% filter(!is.na(6))
    colnames(df)  <- c("value","age")
    cor  <- cor.test(df$value, df$age)
    res_df  <- data.frame(population = colnames(populations)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*40>1,1,cor$p.value*40))
    return(res_df)
}

In [None]:
test  <- future_map(11:51, calc_correlation)

In [None]:
test2  <- bind_rows(test)

In [None]:
test2  %>% arrange(pval)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 4)
for(i in (test2  %>% arrange(pval)  %>% pull(population))[1:10]){
    df2 <- populations  %>% dplyr::select(which(colnames(populations)==i),age = 6) 
    colnames(df2)  <- c("value", "age")
    p  <- df2 %>%  ggplot(aes(x=value, y=age)) +
  geom_point(shape = 16, size = 2) +
 geom_smooth(method=lm) + ggtitle(i) 
    print(p)
    }

# DE genes in CD8 populations

In [None]:
cd8_l3_list  <- list(cd8_l3_tem, cd8_l3_naive, cd8_l3_tcm, cd8_l3_temra, cd8_l3_prolif, 
                    cd8_l2_nk, cd8_l2_unc, cd8_l2_subcluster, cd8_l1_full_filt)

names(cd8_l3_list) <- c("cd8_l3_tem", "cd8_l3_naive", "cd8_l3_tcm", "cd8_l3_temra", "cd8_l3_prolif",
                       "cd8_l2_nk", "cd8_l2_unc", "cd8_l2_subcluster", "cd8_l1_full_filt")

In [None]:
md_for_remission  <- cd8_l1_full_filt@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, part_remission_y_n)  %>% 
tally %>% mutate(Condition2 = paste0("PR_",part_remission_y_n))  %>% 
dplyr::select(-n, -part_remission_y_n)

In [None]:
cd8_l1_full_filt@meta.data  %>% group_by(Patient_ID, Disease, Sample_ID, Experiment_ID)  %>% tally  %>% 
ungroup  %>% 
dplyr::select(-n)  %>% 
group_by(Experiment_ID, Disease)  %>%
tally()

In [None]:
md_for_remission

In [None]:
for( i in 1:length(cd8_l3_list)) {
    
    cd8_l3_list[[i]]$Condition2  <- NULL
    cd8_l3_list[[i]]@meta.data  <- cd8_l3_list[[i]]@meta.data  %>% 
    left_join(md_for_remission)  %>% mutate(Condition2 = paste(Condition2, Time))  %>% 
    mutate(Condition2 = ifelse(grepl(Condition2, pattern = "NA"), NA_character_,Condition2))
    
    print(cd8_l3_list[[i]]$Condition2  %>% table)
    rownames(cd8_l3_list[[i]]@meta.data)  <- colnames(cd8_l3_list[[i]])
}

### Remission and non-remission in T0 vs T1 and Ketoacidosis

In [None]:
md_for_ketoacidosis  <- cd8_l1_full_filt@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, ph_man)  %>% 
tally %>% mutate(Ketoacidosis = ifelse(ph_man<7.3,"Keto_1","Keto_0"))  %>% 
dplyr::select(-ph_man, -n)

In [None]:
md_for_ketoacidosis

In [None]:
for( i in 1:length(cd8_l3_list)) {
    
    #cd8_l3_list[[i]]$Ketoacidosis  <- NULL
    cd8_l3_list[[i]]@meta.data  <- cd8_l3_list[[i]]@meta.data  %>% 
    left_join(md_for_ketoacidosis)   %>% mutate(Keto_Time = paste(Ketoacidosis, Time))  %>% 
    mutate(Keto_Time = ifelse(grepl(Keto_Time, pattern = "NA"), NA_character_,Keto_Time))
    
    print(cd8_l3_list[[i]]$Ketoacidosis  %>% table)
    rownames(cd8_l3_list[[i]]@meta.data)  <- colnames(cd8_l3_list[[i]])
}

In [None]:
cd8_l3_list[[i]]$Keto_Time  %>% table

### Ketoacidosis in T0

In [None]:
Conditions  <- list(c("Dia T0", "Ctrl T0"),
                    c("Dia T0", "Dia T1"),
                    c("Dia T1", "Ctrl T0"),
                    c("PR_0 T0", "PR_1 T0"),
                    c("PR_0 T1", "PR_1 T1"),
                    c("PR_0 T0", "PR_0 T1"),
                    c("PR_1 T0", "PR_1 T1"),
                    c("Keto_1 T0", "Keto_0 T0"))

In [None]:
Conditions  %>% length

In [None]:
dataset_list  <- cd8_l3_list

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Markers_DataFrame  <- function(j){
    print("#######################")
    print(i)
    print(j)
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- dataset_list[[i]]
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) & 
                  Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))
        print(ds$Condition  %>% table)
    
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Condition
        markers_sc  <- FindAllMarkers(ds, only.pos = T)
        markers_sc$source  <- "scRNAseq_RNA"
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    ## scRNAseq - collecTRI
    DefaultAssay(ds)  <- "CollecTRI"
        Idents(ds)  <- ds$Condition
        markers_sc2  <- FindAllMarkers(ds, only.pos = T)
        markers_sc2$source  <- "scRNAseq_collecTRI"
        rownames(markers_sc2)  <- NULL
    print(paste("DE CollecTRI: ", nrow(markers_sc2)))
    
    if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
        print("..")
        markers2$dataset  <- names(dataset_list)[i]
        return(markers2)
}


In [None]:
for(i in 1:length(dataset_list)){

print("######################################################################")
if(i > 1){
suppressWarnings(rm(markers_sc, markers, markers1, markers_sc2, markers_sc_predia, markers_bulk, markers_bulk2))    
}
    
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 
    
if(i>1){ 
    all_markers  <- rbind(all_markers, mrk)
} else {
    all_markers  <- mrk
}
    
    
}

## DE genes with correction for sex-related genes

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Sex_Markers_DataFrame  <- function(i){
    
    ds  <- dataset_list[[i]]
   
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Sex
        markers_sc  <- FindAllMarkers(ds, only.pos = T)
        markers_sc$source  <- "Sex_RNA"
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    ## scRNAseq - collecTRI
    DefaultAssay(ds)  <- "CollecTRI"
        Idents(ds)  <- ds$Sex
        markers_sc2  <- FindAllMarkers(ds, only.pos = T)
        markers_sc2$source  <- "Sex_collecTRI"
        rownames(markers_sc2)  <- NULL
    print(paste("DE CollecTRI: ", nrow(markers_sc2)))
    
    if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- "Sex"
        print("..")
        markers2$dataset  <- names(dataset_list)[i]
        return(markers2)
}


In [None]:

    
mrk  <- map(.x = 1:length(dataset_list), Create_Sex_Markers_DataFrame)

mrk  <- bind_rows(mrk) 


In [None]:
mrk

In [None]:
dir.create("../tables/de_genes")

write.csv(all_markers, "../tables/de_genes/240319_markers_full_cd8_with_collecTRI.csv", row.names = F)

In [None]:
write.csv(mrk, "../tables/de_genes/240319_markers_full_cd8_sex.csv", row.names = F)

In [None]:
# Create filtered marker list without Sex genes

for(i in 1:length(dataset_list)){

filt_df  <- all_markers  %>% dplyr::filter(dataset == names(dataset_list)[i])
genes_to_remove  <- mrk  %>% dplyr::filter(dataset == names(dataset_list)[i])  %>% pull(gene) 

markers_without_sex  <- dplyr::filter(filt_df, !(gene %in% genes_to_remove))
     
if(i>1){ 
    all_markers_without_sex  <- rbind(all_markers_without_sex, markers_without_sex)
} else {
    all_markers_without_sex  <- markers_without_sex
}
    
    
}

In [None]:
nrow(all_markers)

In [None]:
nrow(all_markers_without_sex)

In [None]:
write.csv(all_markers_without_sex, "../tables/de_genes/240319_cd8_all_markers_without_sex.csv", row.names = F)

In [None]:
all_markers_without_sex$dataset %>% table

# DE genes heatmap avg.

In [None]:
cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
avgexp = AverageExpression(subset(cd8_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = F, group.by = c("Patient_Time","Disease","age_group"), 
                          assay = "RNA")

In [None]:
avgexp$RNA[which(rownames(avgexp$RNA)=="GNLY"),]  %>% as.data.frame()

## Heatmap

### All cells

In [None]:
cd8_l1_full_filt$Patient_Time_Disease  <- paste(cd8_l1_full_filt$Patient_ID, cd8_l1_full_filt$Time, 
                                               cd8_l1_full_filt$Disease)

In [None]:
cd8_l1_full_filt$Patient_Time_Disease   %>% table

In [None]:
cd8_l1_full_filt$Disease_time  <- paste(
                                               cd8_l1_full_filt$Disease,
cd8_l1_full_filt$Time)

In [None]:
avgexp = AverageExpression(subset(cd8_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = F, group.by = "Disease_time", 
                          assay = "RNA")

In [None]:
genes  <- c("PCBP2","PCBP1",
            "CX3CR1","TNF","GZMB","GZMA","PRF1","NKG7","GNLY","CCL5","CST7",
            "BTG1","SELL","IL7R","CCR7","BTG2","SLAMF6","LEF1",
             "TNFAIP3","TSC22D3","NKFBIA","DUSP1")

In [None]:
avgexp  <- avgexp$RNA[which(rownames(avgexp$RNA) %in% genes),]

In [None]:
library(pheatmap)

options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp, main = "", scale = "row", cluster_cols = F, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
dir.create("../tables/avg_heatmap/")
write.csv(avgexp, "../tables/avg_heatmap/cd8_avg.csv")

### Average of patient

In [None]:
cd8_l1_full_filt$Patient_Time_Disease  <- paste(cd8_l1_full_filt$Patient_ID, cd8_l1_full_filt$Time, 
                                               cd8_l1_full_filt$Disease)

In [None]:
avgexp = AverageExpression(subset(cd8_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = T, group.by = "Patient_Time_Disease", 
                          assay = "RNA")

In [None]:
avgexp$Patient_Time_Disease  <- colnames(avgexp)
avgexp@meta.data  <- avgexp@meta.data  %>% 
separate(Patient_Time_Disease, into = c("Patient", "Time", "Disease"), sep = " ", remove = F)

In [None]:
avgexp$Disease_Time  <- paste(avgexp$Disease, avgexp$Time)

In [None]:
avgexp2 = AverageExpression(avgexp, 
                             return.seurat = F, group.by = "Disease_Time")

In [None]:
genes4  <- c("LEF1","BACH2","NELL2","TCF7","CXCR4","ZFP36L2","IL7R","KLF2","CCR7","SELL",
            
             
            rev(c("TNFRSF9","PRF1","NKG7","GZMB","CST7","GNLY","GZMA","CX3CR1","CCL5","TNF","TBX21","IFNG")),
             "NFATC2","NFATC3","ORAI1", "ITPR3",
             "TNFAIP3","DUSP1","TSC22D3","NFKBIA","DDIT4","INPP4B","PTPN6","RCAN3",
             rev(c("OASL","IFI44L","ISG15","MX1","STAT1","IFI6","IFIT3")),
                  "BTN3A2","BTN3A3","HLA-C","HLA-DQA1","HLA-DQB1")

In [None]:
as.data.frame(genes4)

In [None]:
avgexp3  <- avgexp2$RNA[which(rownames(avgexp2$RNA) %in% genes4),]

In [None]:
options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp3[match(genes4, rownames(avgexp3)),], 
         main = "", scale = "row", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp3[match(genes4, rownames(avgexp3)),], 
         main = "", scale = "row", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 3.3, height = 9,
                  fontsize = 9, filename = "../figures/heatmaps/fig2_cd8.pdf")