In [None]:
source("diabetes_analysis_v07.R")

In [None]:
rank_score_func <- function(df){
df <- df %>% mutate(score = -1*log(p_val_adj+(10^-310))*avg_log2FC*(pct.1/(pct.2+10^-300)))
return(df)
}

# Analysis of subsets

In [None]:
cd8_l3_tem  <- readRDS("../data/processed/L3/cd8_l3_tem.rds")
cd8_l3_naive  <- readRDS("../data/processed/L3/cd8_l3_naive.rds")
cd8_l3_tcm  <- readRDS("../data/processed/L3/cd8_l3_tcm.rds")
cd8_l3_temra  <- readRDS("../data/processed/L3/cd8_l3_temra.rds")

In [None]:
cd8_l3_prolif  <- readRDS("../data/processed/L3/cd8_l3_prolif.rds")

In [None]:
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")

In [None]:
cd8_l2_nk  <- readRDS("../data/processed/L2/cd8_l2_nk.rds")
cd8_l2_unc  <- readRDS("../data/processed/L2/cd8_l2_unc.rds")

In [None]:
cd8_l1_full_filt  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
cd8_l1_full_filt@meta.data

In [None]:
DimPlot(cd8_l1_full_filt, group.by = "annotations_l2")

In [None]:
cd8_l3_list  <- list(cd8_l3_tem, cd8_l3_naive, cd8_l3_tcm, cd8_l3_temra, cd8_l3_prolif, 
                    cd8_l2_nk, cd8_l2_unc, cd8_l2_subcluster, cd8_l1_full_filt)

names(cd8_l3_list) <- c("cd8_l3_tem", "cd8_l3_naive", "cd8_l3_tcm", "cd8_l3_temra", "cd8_l3_prolif",
                       "cd8_l2_nk", "cd8_l2_unc", "cd8_l2_subcluster", "cd8_l1_full_filt")

In [None]:
cd8_l2_unc

In [None]:
cd8_l3_list[[1]][["Antibodies"]]

In [None]:
 x[["PTPRC"]]
    

In [None]:
cd8_l3_prolif

In [None]:
for(i in 1:length(cd8_l3_list)){
    x  <- cd8_l3_list[[i]]
    DefaultAssay(x)  <- "RNA"
    x[['Antibodies']]  <- NULL
    x[['refAssay']]  <- NULL
    x[['prediction.score.celltype.l1']]  <- NULL
    x[['prediction.score.celltype.l2']]  <- NULL
    x[['prediction.score.celltype.l3']]  <- NULL
    x[['pathwayswmean']]  <- NULL
    x[['CollecTRI']]  <- NULL
    
    x$barcode  <- colnames(x)
    if(grepl(names(cd8_l3_list)[i], pattern = "_l3")){
        x  <- subset(x, barcode %in% sample(colnames(x), size = 2000))
        } else {
        x  <- subset(x, barcode %in% sample(colnames(x), size = 7000))
          }
    saveRDS(x,paste0("../data/processed/diet/",names(cd8_l3_list)[i],".rds"))
}

# Plot of Unconventional populations

In [None]:
options(repr.plot.width = 6, repr.plot.height = 6)
DimPlot(cd8_l1_full_filt, raster = T, pt.size = 2, group.by = "annotations_l2", cols = c("dodgerblue3","grey88","grey88","grey88","grey88","grey88",
                                                                c(scales::hue_pal() (8))[c(7,8)]
)) + NoLegend()


ggsave(filename = paste0(paste0("../figures/Dimplots_unconventional/unconventional_cd8.png")), width = 12, height = 12, units = "cm")
ggsave(filename = paste0(paste0("../figures/Dimplots_unconventional/unconventional_cd8.svg")), width = 12, height = 12, units = "cm")

In [None]:
cd8_l1_full_filt$annotations_l2  %>% table

In [None]:
Idents(cd8_l1_full_filt)  <- cd8_l1_full_filt$annotations_l2
mrk1  <- FindMarkers(cd8_l1_full_filt, `ident.1` = "CD8 Unconventional T cells---gd T cells")
mrk2  <- FindMarkers(cd8_l1_full_filt, `ident.1` = "CD8 Unconventional T cells---MAIT cells")
mrk3  <- FindMarkers(cd8_l1_full_filt, `ident.1` = "CD8 NK cells---NK cells")


In [None]:
mrk3

In [None]:
markers  <- rev(c("CD3D","TRAC", "TRGC1", 'TRDC',"NKG7","CXCR6",
                  "LTB","FCER1G","IL7R",
                  "PTGDS","CCL4","GNLY","KLRB1",
                  "CD8A","CD8B","NCR1","ZBTB16",
                  "GZMK","GZMB",
                 "KLRG1",
                  "TNF","CX3CR1"))

In [None]:
avgexp = AverageExpression(cd8_l1_full_filt, features = markers,
                           return.seurat = F, group.by = "annotations_l2", 
                          assay = "RNA")

In [None]:
options(repr.plot.width = 9.5, repr.plot.height = 3.5)
pheatmap(t(avgexp$RNA)[c(1,7,8,6,5,4,2,3),], main = "", 
         scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 12)

In [None]:
options(repr.plot.width = 9.5, repr.plot.height = 3.5)
pheatmap(t(avgexp$RNA)[c(1,7,8,6,5,4,2,3),], main = "", 
         scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
         filename = "../figures/heatmaps/heatmap_cd8_unconventional.pdf",
         width = 8, height = 3,
                  fontsize = 12) 
         

In [None]:
DefaultAssay(cd8_l1_full_filt)  <- "CollecTRI"

In [None]:
Idents(cd8_l1_full_filt)  <- cd8_l1_full_filt$annotations_manual

In [None]:
mrk  <- FindAllMarkers(cd8_l1_full_filt, only.pos = T)

In [None]:
mrk  %>% filter(cluster == "CD8 T cells")

In [None]:
markers  <- rev(c("SPIC","ZFPM1",  "CEBPZ", "EOMES", 'SPI1',"ZNF395",
                  "RORC","MAFB","CREB3",
                  "STAT5A","NFIL3","FOXA2",
                  "CREB1","NFKB1","NFIL3",
                  "FOXA2","NR3C1","BRD4","ZBTB17",
                  "ETS1","HIF1A","NFKB2","ZBTB16"
                 ))

In [None]:
avgexp = AverageExpression(cd8_l1_full_filt, features = markers,
                           return.seurat = F, group.by = "annotations_l2", 
                          assay = "CollecTRI")

In [None]:
options(repr.plot.width = 7, repr.plot.height = 3.5)
pheatmap(t(avgexp$CollecTRI)[c(1,7,8,6,5,4,2,3),], main = "", 
         scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 12)

In [None]:
options(repr.plot.width = 9.5, repr.plot.height = 3.5)
pheatmap(t(avgexp$CollecTRI)[c(1,7,8,6,5,4,2,3),], main = "", 
         scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
         filename = "../figures/heatmaps/heatmap_cd8_unconventional_collectri.pdf",
         width = 8, height = 3,
                  fontsize = 12) 

## Heatmaps of marker genes

In [None]:
options(repr.plot.width = 7, repr.plot.height = 6)
DimPlot(cd8_l2_subcluster, group.by = "annotations_manual")

In [None]:
markers  <- rev(c("CCR7","SELL",  "TCF7", "LEF1", 'BACH2',
                  "XCL1","KLRC2","IL2RB","ZNF683", "ITGAM",
                  "CXCR3","ITGA4","GZMK", "CD28",
                  "CCL5", "EOMES","ITGB1", "KLRB1","KLRG1",
                  "CCL4","ZEB2","PRF1","TBX21","TOX","IFNG",
                  "GZMA","TNF","GZMB","CX3CR1","HLA-DRA", 
                  "MKI67", "PCNA", "MCM6" ))

In [None]:
avgexp = AverageExpression(cd8_l2_subcluster, features = markers,
                           return.seurat = F, group.by = "annotations_manual", 
                          assay = "RNA")

In [None]:
avgexp$RNA

In [None]:
options(repr.plot.width = 9.5, repr.plot.height = 2.5)
pheatmap(t(avgexp$RNA[,c(2,1,4,3,5)]), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 12)

In [None]:
dir.create("../figures/heatmaps/")

In [None]:
pheatmap(t(avgexp$RNA[,c(2,1,4,3,5)]), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", filename = "../figures/heatmaps/heatmap_cd8_l2.pdf",
         width = 8, height = 3,
                  fontsize = 9)

## Genes in top populations

In [None]:
full_bulk  <- AverageExpression(cd8_l1_full_filt, assays = "integrated", 
                                         return.seurat = T, group.by = "annotations_l3")

In [None]:
full_bulk$annotations_l3  <- colnames(full_bulk)

In [None]:
avgexp = AverageExpression(cd8_l1_full_filt, features = str_to_upper(c("Ccr7", "Lef1","Sell", "Tcf7", "Il7r",  
           "Isg15", "Irf7", "Oas3", "Ifit3", "Mki67", "Pcna", "Mcm6",
            "Klrg1", "Cxcr3","Cxcr6","Gzma", "Gzmk", "Cx3cr1","Il2rb", "Ifng",
                                                                       "Trgc1","Tbx21","Eomes","Prdm1","Il2",
                                                                       "Tnf", 
               "Trdc", "Cd3d", "Cd8a", "Ncr1", "Klrb1"
                                             )), return.seurat = F, group.by = "annotations_l3", assays = "RNA")

In [None]:
library(pheatmap)

In [None]:
options(repr.plot.width = 9, repr.plot.height = 8)
pheatmap(t(avgexp$RNA), main = "", scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue3", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 9)

In [None]:
options(repr.plot.width = 9, repr.plot.height = 10)
pheatmap(t(avgexp$RNA), main = "", scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue3", "grey95", "indianred2"))(50), 
         border_color = "white", filename = "heatmap_cd8_l3.pdf", width = 9, height = 8,
                  fontsize = 9)

## Heatmap Dorothea

In [None]:
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")

In [None]:
cd8_l2_nk  <- readRDS("../data/processed/L2/cd8_l2_nk.rds")
cd8_l2_unc  <- readRDS("../data/processed/L2/cd8_l2_unc.rds")

In [None]:
cd8_l3_tem  <- readRDS("../data/processed/L3/cd8_l3_tem.rds")
cd8_l3_naive  <- readRDS("../data/processed/L3/cd8_l3_naive.rds")
cd8_l3_tcm  <- readRDS("../data/processed/L3/cd8_l3_tcm.rds")
cd8_l3_temra  <- readRDS("../data/processed/L3/cd8_l3_temra.rds")
cd8_l3_prolif  <- readRDS("../data/processed/L3/cd8_l3_prolif.rds")

In [None]:
cd8_l2_subcluster  <- readRDS("../data/processed/L2/cd8_l2_subcluster.rds")
cd8_l2_nk  <- readRDS("../data/processed/L2/cd8_l2_nk.rds")
cd8_l2_unc  <- readRDS("../data/processed/L2/cd8_l2_unc.rds")
cd8_l1_full_filt  <- readRDS("../data/processed/L1/cd8_l1_full_filt.rds")

In [None]:
DefaultAssay(cd8_l2_subcluster)  <- "CollecTRI"

In [None]:
Idents(cd8_l2_subcluster)  <- cd8_l2_subcluster$annotations_manual

In [None]:
coll  <- FindAllMarkers(cd8_l2_subcluster, only.pos = T)

In [None]:
genes  <- coll  %>% group_by(cluster)  %>% slice_head(n = 10)  %>% pull(gene)

In [None]:
dir.create("../tables/cd8/collectri_annotations/")

In [None]:
write.csv(coll, file = "../tables/cd8/collectri_annotations/cd8_l2_subcluster.csv")

In [None]:
avgexp = AverageExpression(cd8_l2_subcluster, features = genes,
                           return.seurat = F, group.by = "annotations_manual", 
                          assay = "CollecTRI")

avgexp$CollecTRI

In [None]:
genes2  <- c('ZFP42',
'KLF3',
'STAT6',
'ATF2',
'NR4A2',
'RBPJ',
'JUND',
'ATF3',
'JUN',
'NFAT5',
'SMAD3',
'ID3',
'RELA',
'STAT1',
'NOTCH1',
'FOS',
'NFKB',
'ZBTB16',
'TCF7',
'RORB',
'CTCFL',
'TCF4',
'KLF2',
'BACH2',
'NR4A3',
'CEBPZ',
'ZNF395',
'IRF5',
'EOMES',
'RUNX3',
'STAT4',
'RORC',
'CEBPD',
'IRF8',
'TBX21',
'RUNX1',
'PRDM1',
'ID2',
'IRF6',
'NFKB1',
'RELB',
'KLF13',
'NFKB2',
'NFKBIB',
'CEBPA',
'E2F4',
'MYC',
'TP53',
'IRF2',
'IRF7','LEF1','CTCF','ZEB1'
)
avgexp = AverageExpression(cd8_l2_subcluster, features = genes2,
                           return.seurat = F, group.by = "annotations_manual", 
                          assay = "CollecTRI")

In [None]:
genes2  <- rev(c("TCF7", "LEF1", "CTCFL", "KLF2","NR4A3","BACH2",
             "CTCF","NOTCH1","KLF3","RBPJ","SMAD3","NFAT5","ID3",
             "STAT6","FOS","JUN","RELB","IRF6","CEBPA", "CEBPD",
             "TBX21","PRDM1","ZNF395","ID2","EOMES","STAT4",
             "RUNX3","IRF2","NFKB","TP53","MYC","E2F4","ZEB1"
))
avgexp = AverageExpression(cd8_l2_subcluster, features = genes2,
                           return.seurat = F, group.by = "annotations_manual", 
                          assay = "CollecTRI")

In [None]:
length(genes2)

In [None]:
options(repr.plot.width = 9.5, repr.plot.height = 2.5)
pheatmap(t(avgexp$CollecTRI[,c(2,1,4,3,5)]), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 12)

In [None]:
pheatmap(t(avgexp$CollecTRI[,c(2,1,4,3,5)]), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", filename = "../figures/heatmaps/heatmap_cd8_l2_collecTRI.pdf",
         width = 8, height = 2,
                  fontsize = 9)

# Frequencies L2

In [None]:
df4  <- create_df4(cd8_l2_subcluster)

In [None]:
df4

In [None]:
 p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(1,2)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 9, repr.plot.height = 7)
p5

In [None]:
ggsave("../figures/subset_characterization/cd8_subsets_in_condition.svg",
       width = 25, height = 12, units = "cm")

In [None]:
    p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
dplyr::filter(Patient_ID != "116")  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(2,3)), paired = TRUE)+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 9, repr.plot.height = 7)
p5

### Unconventional and L2

In [None]:
cd8_l1_full_filt$annotations_manual  %>% table

In [None]:
cd8_l1_full_filt$annotations_manual  <- ifelse(cd8_l1_full_filt$annotations_manual == "Unconventional T cells",
                                              cd8_l1_full_filt$annotations_l2, cd8_l1_full_filt$annotations_manual)

In [None]:
DimPlot(cd8_l1_full_filt, group.by = "annotations_manual")

In [None]:
grep(rownames(cd8_l1_full_filt@assays$RNA@counts), pattern = "TRG", value = T)

In [None]:
dir.create("../figures/Feature_unconventional/")
FeaturePlot(cd8_l1_full_filt, features = c("TRGV9"), min.cutoff = 0, max.cutoff = 1, 
            raster = T, raster.dpi = c(900,900), pt.size = 2)

In [None]:
ggsave(filename = paste0(paste0("../figures/Feature_unconventional/Trgv9_cd8.png")), width = 14, height = 12, units = "cm")
ggsave(filename = paste0(paste0("../figures/Feature_unconventional/Trgv9_cd8.svg")), width = 14, height = 12, units = "cm")

In [None]:
dir.create("../figures/Feature_unconventional/")
FeaturePlot(cd8_l1_full_filt, features = c("TRGC1"), min.cutoff = 0, max.cutoff = 1, 
            raster = T, raster.dpi = c(900,900), pt.size = 2)

In [None]:
ggsave(filename = paste0(paste0("../figures/Feature_unconventional/Trgc1_cd8.png")), width = 14, height = 12, units = "cm")
ggsave(filename = paste0(paste0("../figures/Feature_unconventional/Trgc1_cd8.svg")), width = 14, height = 12, units = "cm")

In [None]:
FeaturePlot(cd8_l1_full_filt, features = c("TRDC"), min.cutoff = 0, max.cutoff = 2, 
            raster = T, raster.dpi = c(900,900), pt.size = 2)
ggsave(filename = paste0(paste0("../figures/Feature_unconventional/Trdc_cd8.png")), width = 14, height = 12, units = "cm")
ggsave(filename = paste0(paste0("../figures/Feature_unconventional/Trdc_cd8.svg")), width = 14, height = 12, units = "cm")

In [None]:
FeaturePlot(cd8_l1_full_filt, features = c("TRDC"), min.cutoff = 0, max.cutoff = 2)

In [None]:
df4  <- create_df4(cd8_l1_full_filt)

In [None]:
df4

In [None]:
 p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(1,2)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 9, repr.plot.height = 7)
p5

In [None]:
ggsave("../figures/subset_characterization/cd8_unc_subsets_in_condition.svg",
       width = 18, height = 12, units = "cm")

In [None]:
    p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
dplyr::filter(Patient_ID != "116")  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(2,3)), paired = TRUE)+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 9, repr.plot.height = 7)
p5

# Frequencies and counts of all populations

In [None]:
cd8_l1_full_filt

In [None]:
all_counts  <- read_csv("../tables/populations_freq/all_levels_counts_cd8.csv")

In [None]:
all_counts$Experiment_ID  %>% table

In [None]:
all_counts

In [None]:
df3  <- all_counts %>% 
  group_by(Sample_ID, Level) %>% 
  mutate(freq_from_total = n / sum(n)) 

In [None]:
df3

In [None]:
## Set parent population

In [None]:
df3  <- df3  %>% separate(annotations, into = c("annot_l1","annot_l2",NA), sep = "---", remove = F)  %>% 
mutate(Parent_annotation = case_when(
Level == "L1" ~ "CD8",
Level == "L2" ~ annot_l1,
Level == "L3" ~ paste0(annot_l1, "---" ,annot_l2)
))

In [None]:
df3

In [None]:
## For each level and each patient calculate the total count per parent population

In [None]:
levels_l1  <- all_counts %>% 
  filter(Level == "L1")  %>% 
pull(annotations)  %>% unique
levels_l2  <- all_counts %>% 
  filter(Level == "L2")  %>% 
pull(annotations)  %>% unique

In [None]:

for(i in 1:length(levels_l1)) {
    
    df_filt  <- df3  %>% filter(grepl(annotations, pattern = levels_l1[i]) & Level == "L2")

    df_filt  <- df_filt  %>% 
            group_by(Sample_ID)  %>% 
        summarise(total_per_patient = sum(n))

    df_filt$Level = "L2"

    df_filt$Parent_annotation = levels_l1[i]

    if(i > 1){
       
        df_sum_of_parent  <- rbind(df_sum_of_parent, df_filt)
        
    } else {
        df_sum_of_parent  <- df_filt
        
        
    }

}

for(i in 1:length(levels_l2)) {
    
    df_filt  <- df3  %>% filter(grepl(annotations, pattern = levels_l2[i]) & Level == "L3")

    df_filt  <- df_filt  %>% 
            group_by(Sample_ID)  %>% 
        summarise(total_per_patient = sum(n))

    df_filt$Level = "L3"

    df_filt$Parent_annotation = levels_l2[i]

    df_sum_of_parent  <- rbind(df_sum_of_parent, df_filt)
   

}

In [None]:
df_sum_of_parent

In [None]:
dim(df3)

In [None]:
df4  <- left_join(df3, df_sum_of_parent)

In [None]:
dim(df4)

In [None]:
df4

In [None]:
df4$freq_from_parent  <- df4$n/df4$total_per_patient

In [None]:
df4

In [None]:
df4$pct_from_total  <- df4$freq_from_total*100
df4$pct_from_parent  <- df4$freq_from_parent*100


In [None]:
df4$freq_from_parent <- ifelse(is.na(df4$freq_from_parent) & df4$Level != "L1", 0, df4$freq_from_parent)
df4$pct_from_parent  <- ifelse(is.na(df4$pct_from_parent) & df4$Level != "L1", 0, df4$pct_from_parent)

## Ordered frequency plots

In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)

df4   %>% 
ggplot(aes(x = Condition,
             y = pct_from_parent)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0, height = 0), 
                size = 1, stackdir='center', aes(color = Condition)) + 
  theme_classic() + xlab("") + ylab("Value") +
facet_wrap(~factor(annotations, labels = gsub(levels(factor(annotations)), 
                                              pattern = "---", replacement = "\n")),
           scales = "free", ncol = 6) +
 ylim(0,NA) +
ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 22),
          axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(colour = "black")) + ggtitle("CD8 Pct from parent")

In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)

df4   %>% 
ggplot(aes(x = Condition,
             y = pct_from_total)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0, height = 0), 
                size = 1, stackdir='center', aes(color = Condition)) + 
  theme_classic() + xlab("") + ylab("Value") +
facet_wrap(~factor(annotations, labels = gsub(levels(factor(annotations)), 
                                              pattern = "---", replacement = "\n")),
           scales = "free", ncol = 6) +
 ylim(0,NA) +
ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 22),
          axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(colour = "black")) + ggtitle("CD8 Pct from total")

## Plot of differences in Conditions

In [None]:
annotations_to_test  <- df4$annotations  %>% unique()
comparisons_to_test  <- c("Ctrl T0", "Dia T0", "Dia T1")

In [None]:
annotations_to_test

### Freq from total

In [None]:
for(i in 1:length(annotations_to_test)){


## Comparison Dia T0 vs Dia T1
    j = 2
    k = 3
df  <- df4  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_total)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = "Dia T0 vs Dia T1",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[1],
                         mean_ctrl = df2$mean[2], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% 
mutate(ratio = (mean_dia)/(mean_ctrl))
    
if(i == 1){
    df_final  <- df_all
} else {
    df_final  <- rbind(df_final, df_all)
}

## Comparison of Dia vs controls in both times

    j = 2
    k = 1
df  <- df4  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_total)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = paste(comparisons_to_test[j], "vs", comparisons_to_test[k]),
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
    df_final  <- rbind(df_final, df_all)
    

    j = 3
    k = 1
df  <- df4  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_total)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = paste(comparisons_to_test[j], "vs", comparisons_to_test[k]),
                         estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
    df_final  <- rbind(df_final, df_all)


## Comparison of Dia - partial remission vs. no remission

df  <- df4  %>% 
    filter(annotations == annotations_to_test[i] & Disease == "Dia" & !is.na(Condition2))  %>% 
    dplyr::select(Condition = Condition2, value = pct_from_total)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = "PR_0 vs PR_1",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[1],
                         mean_ctrl = df2$mean[2], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
     
    df_final  <- rbind(df_final, df_all)
    
    }

In [None]:
df_final  %>% arrange(pval)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)
df_final  %>% 
mutate(color = ifelse(estimate < 0 & upper < 0, 
                      "1", 
                      ifelse(estimate > 0 & lower > 0, "2", "3")))  %>% 
  ggplot(aes(estimate, name, color = color)) +
  geom_vline(xintercept = 0, color = "gray75") +
  geom_linerange(aes(xmin = lower, xmax = upper),
                 size = 1.5,
                 alpha = 0.5) +
  geom_point(size = 4) +
  theme_minimal(base_size = 16) +
  scale_color_manual(values = c("green4", "red3", "grey"), guide = "none") +
facet_wrap(~comparison, ncol = 4) +
  labs(title = "", y = NULL,
       x = "Probability \n(95% Confidence Intervals)") +
  theme(axis.text.y = element_text(hjust = 0, size = 18),
        panel.grid = element_blank()) + ggtitle("Pct from Total")

In [None]:
write.csv(df_final, "../tables/populations_freq/cd8_pct_from_total.csv")

### Freq from parent

In [None]:
annotations_to_test  <- df4$annotations  %>% unique()
annotations_to_test  <- annotations_to_test[3:length(annotations_to_test)]

In [None]:
annotations_to_test 

In [None]:
df_final  <- NULL

In [None]:
df5  <- df4  %>% filter(Level != "L1")

for(i in c(5:length(annotations_to_test))){
print(i)

## Comparison Dia T0 vs Dia T1
    j = 2
    k = 3
df  <- df5  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_parent)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = "Dia T0 vs Dia T1",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[1],
                         mean_ctrl = df2$mean[2], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% 
mutate(ratio = (mean_dia)/(mean_ctrl))
    
if(i == 1){
    df_final  <- df_all
} else {
    df_final  <- rbind(df_final, df_all)
}

## Comparison of Dia vs controls in both times

    j = 2
    k = 1
df  <- df5  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_parent)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = paste(comparisons_to_test[j], "vs", comparisons_to_test[k]),
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
    df_final  <- rbind(df_final, df_all)
    

    j = 3
    k = 1
df  <- df5  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_parent)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = paste(comparisons_to_test[j], "vs", comparisons_to_test[k]),
                         estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
    df_final  <- rbind(df_final, df_all)


## Comparison of Dia - partial remission vs. no remission

df  <- df5  %>% 
    filter(annotations == annotations_to_test[i] & Disease == "Dia" & !is.na(Condition2))  %>% 
    dplyr::select(Condition = Condition2, value = pct_from_parent)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = "PR_0 vs PR_1",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[1],
                         mean_ctrl = df2$mean[2], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
     
    df_final  <- rbind(df_final, df_all)
    
    }

df_final  %>% arrange(pval)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)
df_final  %>% 
mutate(color = ifelse(estimate < 0 & upper < 0, 
                      "1", 
                      ifelse(estimate > 0 & lower > 0, "2", "3")))  %>% 
  ggplot(aes(estimate, name, color = color)) +
  geom_vline(xintercept = 0, color = "gray75") +
  geom_linerange(aes(xmin = lower, xmax = upper),
                 size = 1.5,
                 alpha = 0.5) +
  geom_point(size = 4) +
  theme_minimal(base_size = 16) +
  scale_color_manual(values = c("green4", "red3", "grey"), guide = "none") +
facet_wrap(~comparison, ncol = 4) +
  labs(title = "", y = NULL,
       x = "Probability \n(95% Confidence Intervals)") +
  theme(axis.text.y = element_text(hjust = 0, size = 18),
        panel.grid = element_blank())

write.csv(df_final, "../tables/populations_freq/cd8_pct_from_parent.csv")

# PCA populations

In [None]:
model_table

In [None]:
colnames(model_table)

In [None]:
pca_cd8_cluster  <- model_table  %>% 
dplyr::select(11:51,Sample_ID)  %>% 
column_to_rownames(var = "Sample_ID")  %>% t

In [None]:
pca_cd8_cluster

In [None]:
pca_cd8_cluster  %>% as.matrix

In [None]:
library("factoextra")
library("FactoMineR")

In [None]:
res.pca <- PCA(t(pca_cd8_cluster),  graph = FALSE, ncp = 20)

In [None]:
fviz_screeplot(res.pca, addlabels = TRUE, ylim = c(0, 50), ncp = 20)

In [None]:
res.pca$ind$coord

In [None]:
var <- get_pca_var(res.pca)
head(var$contrib)

In [None]:
fviz_pca_var(res.pca, col.var="contrib",
             gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
             repel = TRUE # Avoid text overlapping
             )

In [None]:
colnames(pca_cd8_cluster)

In [None]:
model_table  %>% colnames

In [None]:
metadata_pca  <- model_table  %>% 
dplyr::select(1:10)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
fviz_pca_ind(res.pca, col.ind = metadata_pca$Experiment_ID)

In [None]:
fviz_pca_ind(res.pca, col.ind = metadata_pca$Age_group)

In [None]:
fviz_pca_ind(res.pca, col.ind = metadata_pca$Experiment_ID)

In [None]:
fviz_pca_ind(res.pca, col.ind = metadata_pca$Disease, axes = c(1,2))

In [None]:
fviz_pca_ind(res.pca, col.ind = metadata_pca$Age_group, axes = c(3,4))

In [None]:
fviz_pca_ind(res.pca, col.ind = metadata_pca$Disease, axes = c(3,4))

In [None]:
df_pca  <- res.pca$ind$coord

In [None]:
df_pca  <- cbind(df_pca, metadata_pca)

In [None]:
df_pca

In [None]:
df_pca   <- df_pca %>% pivot_longer(starts_with("Dim"), names_to = "Dim", values_to = "value")

In [None]:
df_pca

In [None]:
options(repr.plot.width = 26, repr.plot.height = 25)

df_pca  %>% 
ggplot(aes(x = Disease, y = value)) + 
geom_boxplot(outlier.shape = NA, 
           alpha = 0.5, width = 0.9, aes(fill = Disease)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
   geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0), 
  size = 3, stackdir='center', aes(fill = Disease, shape = Disease), color = "black") + 
  facet_wrap(~Dim, scales = "free", ncol = 4) +
scale_shape_manual(values = c(21,22,22))+
scale_fill_manual(values = c("lightsteelblue1","rosybrown1", "purple"))+
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.1, label.y.npc = 1, size = 7.025, vjust = 0.3, label = "p.format")+
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.line = element_line(color = "black"),
        axis.text.x = element_text(angle = 90))

In [None]:
options(repr.plot.width = 26, repr.plot.height = 25)

df_pca  %>% 
filter(Time == "T0")  %>% 
ggplot(aes(x = Disease, y = value)) + 
geom_boxplot(outlier.shape = NA, 
           alpha = 0.5, width = 0.9, aes(fill = Disease)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
   geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0), 
  size = 3, stackdir='center', aes(fill = Disease, shape = Disease), color = "black") + 
  facet_wrap(~Dim, scales = "free", ncol = 4) +
scale_shape_manual(values = c(21,22,22))+
scale_fill_manual(values = c("lightsteelblue1","rosybrown1", "purple"))+
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.1, label.y.npc = 1, size = 7.025, vjust = 0.3, label = "p.format")+
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.line = element_line(color = "black"),
        axis.text.x = element_text(angle = 90))

In [None]:
options(repr.plot.width = 26, repr.plot.height = 25)

df_pca  %>% 
filter(Condition %in% c("Dia T1", "Ctrl T0"))  %>% 
ggplot(aes(x = Condition, y = value)) + 
geom_boxplot(outlier.shape = NA, 
           alpha = 0.5, width = 0.9, aes(fill = Disease)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
   geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0), 
  size = 3, stackdir='center', aes(fill = Disease, shape = Disease), color = "black") + 
  facet_wrap(~Dim, scales = "free", ncol = 4) +
scale_shape_manual(values = c(21,22,22))+
scale_fill_manual(values = c("lightsteelblue1","rosybrown1", "purple"))+
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.1, label.y.npc = 1, size = 7.025, vjust = 0.3, label = "p.format")+
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.line = element_line(color = "black"),
        axis.text.x = element_text(angle = 90))

In [None]:
options(repr.plot.width = 26, repr.plot.height = 25)

df_pca  %>% 
filter(Condition %in% c("Dia T0"))  %>% 
ggplot(aes(x = Condition2, y = value)) + 
geom_boxplot(outlier.shape = NA, 
           alpha = 0.5, width = 0.9, aes(fill = Disease)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
   geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0), 
  size = 3, stackdir='center', aes(fill = Disease, shape = Disease), color = "black") + 
  facet_wrap(~Dim, scales = "free", ncol = 4) +
scale_shape_manual(values = c(21,22,22))+
scale_fill_manual(values = c("lightsteelblue1","rosybrown1", "purple"))+
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.1, label.y.npc = 1, size = 7.025, vjust = 0.3, label = "p.format")+
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.line = element_line(color = "black"),
        axis.text.x = element_text(angle = 90))

# Populations - correlations with C-peptide

In [None]:
fast  <- cd8_l1_full_filt@meta.data  %>% dplyr::select(Sample_ID, fasting_cpept_T1)  %>% unique

In [None]:
model_table

In [None]:
populations  <- model_table  %>% 
 left_join(fast)

In [None]:
populations

In [None]:
calc_correlation  <- function(i){
    df  <- populations  %>% dplyr::select(i,52)  %>% filter(!is.na(52))
    colnames(df)  <- c("value","fasting_cpept_T1")
    cor  <- cor.test(df$value, df$fasting_cpept_T1)
    res_df  <- data.frame(population = colnames(populations)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*40>1,1,cor$p.value*40))
    return(res_df)
}

In [None]:
test  <- future_map(11:51, calc_correlation)

In [None]:
test2  <- bind_rows(test)

In [None]:
test2  %>% arrange(pval)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 4)
for(i in (test2  %>% arrange(pval)  %>% pull(population))[1:10]){
    df2 <- populations  %>% dplyr::select(which(colnames(populations)==i),fasting_cpept_T1 = 52) 
    colnames(df2)  <- c("value", "fasting_cpept_T1")
    p  <- df2 %>%  ggplot(aes(x=value, y=fasting_cpept_T1)) +
  geom_point(shape = 16, size = 2) +
 geom_smooth(method=lm) + ggtitle(i) 
    print(p)
    }

In [None]:
cd8_l1_full_filt@meta.data  %>% group_by(Patient_ID, Experiment_ID, Disease)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% group_by(Experiment_ID, Disease)  %>% tally  %>% arrange(Experiment_ID)

# Populations - correlations with age

In [None]:
calc_correlation  <- function(i){
    df  <- populations  %>% dplyr::select(i,6)  %>% filter(!is.na(6))
    colnames(df)  <- c("value","age")
    cor  <- cor.test(df$value, df$age)
    res_df  <- data.frame(population = colnames(populations)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*40>1,1,cor$p.value*40))
    return(res_df)
}

In [None]:
test  <- future_map(11:51, calc_correlation)

In [None]:
test2  <- bind_rows(test)

In [None]:
test2  %>% arrange(pval)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 4)
for(i in (test2  %>% arrange(pval)  %>% pull(population))[1:10]){
    df2 <- populations  %>% dplyr::select(which(colnames(populations)==i),age = 6) 
    colnames(df2)  <- c("value", "age")
    p  <- df2 %>%  ggplot(aes(x=value, y=age)) +
  geom_point(shape = 16, size = 2) +
 geom_smooth(method=lm) + ggtitle(i) 
    print(p)
    }

# DE genes in CD8 populations

In [None]:
cd8_l3_list  <- list(cd8_l3_tem, cd8_l3_naive, cd8_l3_tcm, cd8_l3_temra, cd8_l3_prolif, 
                    cd8_l2_nk, cd8_l2_unc, cd8_l2_subcluster, cd8_l1_full_filt)

names(cd8_l3_list) <- c("cd8_l3_tem", "cd8_l3_naive", "cd8_l3_tcm", "cd8_l3_temra", "cd8_l3_prolif",
                       "cd8_l2_nk", "cd8_l2_unc", "cd8_l2_subcluster", "cd8_l1_full_filt")

In [None]:
md_for_remission  <- cd8_l1_full_filt@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, part_remission_y_n)  %>% 
tally %>% mutate(Condition2 = paste0("PR_",part_remission_y_n))  %>% 
dplyr::select(-n, -part_remission_y_n)

In [None]:
cd8_l1_full_filt@meta.data  %>% group_by(Patient_ID, Disease, Sample_ID, Experiment_ID)  %>% tally  %>% 
ungroup  %>% 
dplyr::select(-n)  %>% 
group_by(Experiment_ID, Disease)  %>%
tally()

In [None]:
md_for_remission

In [None]:
for( i in 1:length(cd8_l3_list)) {
    
    cd8_l3_list[[i]]$Condition2  <- NULL
    cd8_l3_list[[i]]@meta.data  <- cd8_l3_list[[i]]@meta.data  %>% 
    left_join(md_for_remission)  %>% mutate(Condition2 = paste(Condition2, Time))  %>% 
    mutate(Condition2 = ifelse(grepl(Condition2, pattern = "NA"), NA_character_,Condition2))
    
    print(cd8_l3_list[[i]]$Condition2  %>% table)
    rownames(cd8_l3_list[[i]]@meta.data)  <- colnames(cd8_l3_list[[i]])
}

### Remission and non-remission in T0 vs T1 and Ketoacidosis

In [None]:
md_for_ketoacidosis  <- cd8_l1_full_filt@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, ph_man)  %>% 
tally %>% mutate(Ketoacidosis = ifelse(ph_man<7.3,"Keto_1","Keto_0"))  %>% 
dplyr::select(-ph_man, -n)

In [None]:
md_for_ketoacidosis

In [None]:
for( i in 1:length(cd8_l3_list)) {
    
    #cd8_l3_list[[i]]$Ketoacidosis  <- NULL
    cd8_l3_list[[i]]@meta.data  <- cd8_l3_list[[i]]@meta.data  %>% 
    left_join(md_for_ketoacidosis)   %>% mutate(Keto_Time = paste(Ketoacidosis, Time))  %>% 
    mutate(Keto_Time = ifelse(grepl(Keto_Time, pattern = "NA"), NA_character_,Keto_Time))
    
    print(cd8_l3_list[[i]]$Ketoacidosis  %>% table)
    rownames(cd8_l3_list[[i]]@meta.data)  <- colnames(cd8_l3_list[[i]])
}

In [None]:
cd8_l3_list[[i]]$Keto_Time  %>% table

### Ketoacidosis in T0

In [None]:
Conditions  <- list(c("Dia T0", "Ctrl T0"),
                    c("Dia T0", "Dia T1"),
                    c("Dia T1", "Ctrl T0"),
                    c("PR_0 T0", "PR_1 T0"),
                    c("PR_0 T1", "PR_1 T1"),
                    c("PR_0 T0", "PR_0 T1"),
                    c("PR_1 T0", "PR_1 T1"),
                    c("Keto_1 T0", "Keto_0 T0"))

In [None]:
Conditions  %>% length

In [None]:
dataset_list  <- cd8_l3_list

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Markers_DataFrame  <- function(j){
    print("#######################")
    print(i)
    print(j)
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- dataset_list[[i]]
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) & 
                  Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))
        print(ds$Condition  %>% table)
    
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Condition
        markers_sc  <- FindAllMarkers(ds, only.pos = T)
        markers_sc$source  <- "scRNAseq_RNA"
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    ## scRNAseq - collecTRI
    DefaultAssay(ds)  <- "CollecTRI"
        Idents(ds)  <- ds$Condition
        markers_sc2  <- FindAllMarkers(ds, only.pos = T)
        markers_sc2$source  <- "scRNAseq_collecTRI"
        rownames(markers_sc2)  <- NULL
    print(paste("DE CollecTRI: ", nrow(markers_sc2)))
    
    if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
        print("..")
        markers2$dataset  <- names(dataset_list)[i]
        return(markers2)
}


In [None]:
for(i in 1:length(dataset_list)){

print("######################################################################")
if(i > 1){
suppressWarnings(rm(markers_sc, markers, markers1, markers_sc2, markers_sc_predia, markers_bulk, markers_bulk2))    
}
    
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 
    
if(i>1){ 
    all_markers  <- rbind(all_markers, mrk)
} else {
    all_markers  <- mrk
}
    
    
}

## DeSeq2

In [None]:
Conditions  <- list(c("Dia T0", "Ctrl T0"),
                    c("Dia T0", "Dia T1"),
                    c("Dia T1", "Ctrl T0"),
                    c("PR_0 T0", "PR_1 T0"),
                    c("PR_0 T1", "PR_1 T1"),
                    c("PR_0 T0", "PR_0 T1"),
                    c("PR_1 T0", "PR_1 T1"),
                    c("Keto_1 T0", "Keto_0 T0"))

Conditions  %>% length

dataset_list  <- cd8_l3_list

# Function to calculate markers for a subsetted da for given conditions

In [None]:
Create_Markers_DataFrame  <- function(j){
     print("#######################")
    print(i)
    print(j)
    
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- dataset_list[[i]]
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) & 
                  Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))
        print(ds$Condition  %>% table)
    
    ## DESeq2
     patient_metadata2  <- ds@meta.data  %>% 
    dplyr::select(Sample_ID, Patient_ID, Condition, Disease, Time, Experiment_ID)  %>% 
    unique  %>% ungroup
    avgexp = AggregateExpression(ds,
                           return.seurat = T, group.by = "Sample_ID", 
                          assay = "RNA", slot = "counts")
    avgexp$Sample_ID  <- as.numeric(colnames(avgexp))
    avgexp@meta.data  <- avgexp@meta.data   %>% left_join(patient_metadata2)
    rownames(avgexp@meta.data)  <- colnames(avgexp)

    Idents(avgexp)  <- avgexp$Condition

    mrk_deseq  <- FindAllMarkers(avgexp, min.pct = 0.05, test.use = "DESeq2", only.pos = T)
    
    print(paste("DE RNA: ", nrow(mrk_deseq)))
    
        if(nrow(mrk_deseq)>0){
            markers2  <- mrk_deseq
           
            } else {
          markers2  <- data.frame(p_val = NA_integer_, avg_log2FC = NA_integer_, pct.1 = NA_integer_,
                                        pct.2 = NA_integer_, p_val_adj = NA_integer_, cluster = "", gene = "")
                }
        markers2$source  <- "DESeq2"
        rownames(markers2)  <- NULL
       markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
       print("..")
       markers2$dataset  <- names(dataset_list)[i]
       return(markers2)
}

In [None]:
for(i in 1:length(dataset_list)){

print("######################################################################")
if(i > 1){
suppressWarnings(rm(markers_sc, markers, markers1, markers_sc2, markers_sc_predia, markers_bulk, markers_bulk2))    
}
    
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 
    
if(i>1){ 
    all_markers  <- rbind(all_markers, mrk)
} else {
    all_markers  <- mrk
}
    
    
}

In [None]:
all_markers  %>% arrange(p_val_adj) 

In [None]:
dir.create("../tables/DESeq_markers/")

In [None]:
write.csv(all_markers  %>% arrange(p_val_adj), "../tables/DESeq_markers/cd8_deseq.csv")

## DE genes with correction for sex-related genes

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Sex_Markers_DataFrame  <- function(i){
    
    ds  <- dataset_list[[i]]
   
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Sex
        markers_sc  <- FindAllMarkers(ds, only.pos = T)
        markers_sc$source  <- "Sex_RNA"
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    ## scRNAseq - collecTRI
    DefaultAssay(ds)  <- "CollecTRI"
        Idents(ds)  <- ds$Sex
        markers_sc2  <- FindAllMarkers(ds, only.pos = T)
        markers_sc2$source  <- "Sex_collecTRI"
        rownames(markers_sc2)  <- NULL
    print(paste("DE CollecTRI: ", nrow(markers_sc2)))
    
    if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- "Sex"
        print("..")
        markers2$dataset  <- names(dataset_list)[i]
        return(markers2)
}


In [None]:

    
mrk  <- map(.x = 1:length(dataset_list), Create_Sex_Markers_DataFrame)

mrk  <- bind_rows(mrk) 


In [None]:
mrk

In [None]:
dir.create("../tables/de_genes")

write.csv(all_markers, "../tables/de_genes/240319_markers_full_cd8_with_collecTRI.csv", row.names = F)

In [None]:
write.csv(mrk, "../tables/de_genes/240319_markers_full_cd8_sex.csv", row.names = F)

In [None]:
# Create filtered marker list without Sex genes

for(i in 1:length(dataset_list)){

filt_df  <- all_markers  %>% dplyr::filter(dataset == names(dataset_list)[i])
genes_to_remove  <- mrk  %>% dplyr::filter(dataset == names(dataset_list)[i])  %>% pull(gene) 

markers_without_sex  <- dplyr::filter(filt_df, !(gene %in% genes_to_remove))
     
if(i>1){ 
    all_markers_without_sex  <- rbind(all_markers_without_sex, markers_without_sex)
} else {
    all_markers_without_sex  <- markers_without_sex
}
    
    
}

In [None]:
nrow(all_markers)

In [None]:
nrow(all_markers_without_sex)

In [None]:
write.csv(all_markers_without_sex, "../tables/de_genes/240319_cd8_all_markers_without_sex.csv", row.names = F)

In [None]:
all_markers_without_sex$dataset %>% table

## DE genes in SNP variant

In [None]:
snp_meta  <- read_csv("../data/snp_meta_our_paitents.csv")

In [None]:
snp_meta$`...1`  <- NULL

In [None]:
snp_meta

In [None]:
snp_meta$PTPN22_rs2476601 %>% table
snp_meta$IFIH1_rs1990760 %>% table
snp_meta$CD226_rs763361 %>% table
snp_meta$CD69_rs4763879 %>% table
snp_meta$TYK2_rs2304256 %>% table
snp_meta$UBASH3A_rs876498 %>% table

In [None]:
snp_meta  <- snp_meta  %>% dplyr::select(Patient_ID, PTPN22_rs2476601, IFIH1_rs1990760,
                                         CD226_rs763361, CD69_rs4763879,
                                        TYK2_rs2304256, UBASH3A_rs876498)

In [None]:
snp_meta$PTPN22_rs2476601   <- ifelse(snp_meta$PTPN22_rs2476601 %in% c("AA","AG","GG"), snp_meta$PTPN22_rs2476601, NA_character_)

In [None]:
snp_meta$TYK2_rs2304256 %>% table

In [None]:
snp_meta$TYK2_rs2304256   <- ifelse(snp_meta$TYK2_rs2304256 %in% c("AA","AC","CC"), snp_meta$TYK2_rs2304256, NA_character_)

In [None]:
snp_meta$CD226_rs763361  <- ifelse(snp_meta$CD226_rs763361 %in% c("CC","CT","TT"), snp_meta$CD226_rs763361, NA_character_)

In [None]:
snp_meta$CD69_rs4763879  <- ifelse(snp_meta$CD69_rs4763879 %in% c("AA","AG","GG"), snp_meta$CD69_rs4763879, NA_character_)

In [None]:
snp_meta$UBASH3A_rs876498 %>% table

In [None]:
snp_meta$UBASH3A_rs876498  <- ifelse(snp_meta$UBASH3A_rs876498 %in% c("AA","AG","GG"), snp_meta$UBASH3A_rs876498, NA_character_)

In [None]:
snp_meta$PTPN22_rs2476601 %>% table
snp_meta$IFIH1_rs1990760 %>% table
snp_meta$CD226_rs763361 %>% table
snp_meta$CD69_rs4763879 %>% table
snp_meta$TYK2_rs2304256 %>% table
snp_meta$UBASH3A_rs876498 %>% table

In [None]:
snp_meta

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_SNP_Markers_DataFrame  <- function(i){
    
    ds  <- dataset_list[[i]]
    
    for(j in 2:ncol(snp_meta)){
        
    one_snp_meta  <- dplyr::select(snp_meta, 1,j)
    one_snp_meta$Patient_ID  <- as.character(one_snp_meta$Patient_ID)
        colnames(one_snp_meta)[2]  <- "variant"
    ds@meta.data  <- left_join(ds@meta.data, one_snp_meta)
    rownames(ds@meta.data)  <- colnames(ds)
    
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$variant
        markers_sc  <- FindAllMarkers(ds, only.pos = F)
        markers_sc$source  <- paste(colnames(snp_meta)[j], "RNA")
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    ## scRNAseq - collecTRI
    DefaultAssay(ds)  <- "CollecTRI"
       Idents(ds)  <- ds$variant
        markers_sc2  <- FindAllMarkers(ds, only.pos = F)
        markers_sc2$source  <- paste(colnames(snp_meta)[j], "CollecTRI")
        rownames(markers_sc2)  <- NULL
    print(paste("DE CollecTRI: ", nrow(markers_sc2)))
    
    if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
        
        if(j>2){
            markers3  <- rbind(markers3, markers2)
        } else {
        markers3  <- markers2    
        }
        
        ds$variant  <- NULL
        
        }
    
        markers3$test_type  <- "SNP"
        print("..")
        markers3$dataset  <- names(dataset_list)[i]
        return(markers3)
}


In [None]:
dataset_list  <- cd8_l3_list

In [None]:

    
mrk  <- map(.x = 1:length(dataset_list), Create_SNP_Markers_DataFrame)

mrk  <- bind_rows(mrk) 


In [None]:
mrk

In [None]:
write.csv(mrk, "../tables/de_genes/240315_snp_cd8.csv")

In [None]:
mrk  %>% dplyr::filter(gene %in% c("PTPN22","IFIH1","CD69","CD226","UBASH3A","TYK2"))

In [None]:
mrk  %>% group_by(cluster, source)  %>% tally  %>% arrange(source)

In [None]:
mrk  %>% filter(grepl(source, pattern = "RNA")) %>% group_by(gene)  %>% tally  %>% arrange(desc(n)) 

## Analysis of DE genes

In [None]:
library(EnsDb.Hsapiens.v86)

In [None]:
geneIDs1 <- ensembldb::select(EnsDb.Hsapiens.v86, keys= all_markers_without_sex$gene, 
                              keytype = "SYMBOL", columns = c("SYMBOL","ENTREZID"))

In [None]:
geneIDs1

In [None]:
colnames(geneIDs1)  <- c("gene", "entrezid")

In [None]:
all_markers_without_sex

## Ctrl vs Dia T0

### RNA

In [None]:
genes_Dia_vs_Ctrl_T0  <- all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_RNA")  %>% 
group_by(gene)  %>% tally()  %>% arrange(desc(n))  %>% pull(gene)

In [None]:
genes_Dia_vs_Ctrl_T0

In [None]:
genes_Dia_vs_Ctrl_T0  %>% length

In [None]:
lvl  <- all_markers_without_sex  %>% dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_RNA")  %>% 
group_by(gene, cluster)  %>% arrange(cluster)  %>% pull(gene)  %>% unique

In [None]:
all_markers_without_sex  %>% dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_RNA")  %>% 
group_by(gene)  %>% tally  %>% arrange(desc(n))

In [None]:
is.more.than.one.cluster  <- all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_RNA")  %>% 
group_by(gene)  %>% tally  %>% arrange(desc(n))  %>% dplyr::filter(n>1)  %>% pull(gene)

In [None]:
is.more.than.one.cluster

In [None]:
options(repr.plot.height = 4, repr.plot.width = 20)
all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & 
              gene %in% genes_Dia_vs_Ctrl_T0 & 
              source == "scRNAseq_RNA" )  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene, levels = lvl), dataset)) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = cluster_source)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_text(angle = 90))

In [None]:
options(repr.plot.height = 4, repr.plot.width = 20)
all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & 
              gene %in% genes_Dia_vs_Ctrl_T0 & 
              source == "scRNAseq_RNA" & gene %in% is.more.than.one.cluster)  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene, levels = lvl), dataset)) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = cluster_source)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_text(angle = 90))

### CollecTRI

In [None]:
genes_Dia_vs_Ctrl_T0  <- all_markers_without_sex  %>% dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_collecTRI")  %>% 
group_by(gene)  %>% tally()  %>% arrange(desc(n))  %>% pull(gene)

In [None]:
genes_Dia_vs_Ctrl_T0

In [None]:
lvl  <- all_markers_without_sex  %>% dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_collecTRI")  %>% 
group_by(gene, cluster)  %>% arrange(cluster)  %>% pull(gene)  %>% unique

In [None]:
all_markers_without_sex  %>% dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_collecTRI")  %>% 
group_by(gene)  %>% tally  %>% arrange(desc(n))

In [None]:
is.more.than.one.cluster  <- all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_collecTRI")  %>% 
group_by(gene)  %>% tally  %>% arrange(desc(n))  %>% dplyr::filter(n>1)  %>% pull(gene)

In [None]:
is.more.than.one.cluster

In [None]:
options(repr.plot.height = 4, repr.plot.width = 20)
all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & 
              gene %in% genes_Dia_vs_Ctrl_T0 & 
              source == "scRNAseq_collecTRI" )  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene, levels = lvl), dataset)) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = cluster_source)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_text(angle = 90))

In [None]:
options(repr.plot.height = 4, repr.plot.width = 18)
all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & 
              gene %in% genes_Dia_vs_Ctrl_T0 & 
              source == "scRNAseq_collecTRI" & gene %in% is.more.than.one.cluster)  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene, levels = lvl), dataset)) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = cluster_source)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_text(angle = 90))

# DE gd vs CD8 Naive


In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cd8_l1_full_filt, group.by = "annotations_l2")

In [None]:
Idents(cd8_l1_full_filt)  <- cd8_l1_full_filt$annotations_l2

In [None]:
table(cd8_l1_full_filt$annotations_l2)

In [None]:
mrk  <- FindMarkers(cd8_l1_full_filt,
                   `ident.1` = "CD8 Unconventional T cells---gd T cells",
                   `ident.2` = "CD8 T cells---Naive")

In [None]:
DefaultAssay(cd8_l1_full_filt)  <- "RNA"

In [None]:
fc  <- FoldChange(cd8_l1_full_filt,
                   `ident.1` = "CD8 Unconventional T cells---gd T cells",
                   `ident.2` = "CD8 T cells---Naive")

In [None]:
fc  <- fc  %>% arrange(avg_log2FC)  %>% rownames_to_column("gene")

In [None]:
fc  %>% dplyr::filter(gene %in% treg_gene_sig)

In [None]:
treg_gene_sig  <- c('S100A4', 'FOXP3', 'ITGB1', 'AHNAK', 'TIGIT', 'ANXA2', 'IL10RA', 
                    'TNFRSF1B', 'GBP5', 'LGALS1', 'RTKN2', 'CTLA4', 'S100A10', 'IL32', 
                    'FCRL3', 'IL2RA', 'CLIC1', 'KLF6', 'ANXA5', 'SYNE2', 'S100A11', 
                    'CD74', 'CRIP1', 'EZR', 'NIBAN1', 'FLNA', 'NCF4', 'PRDM1', 'LGALS3', 
                    'CAPN2', 'ARID5B', 'SH3BGRL3', 'CST7', 'ISG20', 'MYO1F', 'LMNA', 
                    'HLA-DRB5', 'TENT5C', 'GAPDH', 'MTHFD2', 'FANK1', 'HLA-DQA1', 
                    'IL2RB', 'IKZF2', 'SRGN', 'STAM', 'CLDND1', 'DUSP4', 'BIRC3', 
                    'SAT1', 'PBXIP1', 'HLA-DRB1', 'HLA-DPA1', 'TNFRSF4', 'SHMT2', 
                    'TAGLN2', 'PLP2', 'ACTN4', 'DUSP1', 'RORA', 'CD99', 'GLCCI1', 
                    'CARD16', 'PTTG1', 'TSPAN5', 'TAP1', 'OPTN', 'EIF3A', 'ELOVL5', 
                    'LSR', 'GSTK1', 'ZFP36', 'TIFA', 'BATF', 'EMP3', 'TSC22D3', 'OGDH', 
                    'HLA-DPB1', 'CCDC50', 'LIMS1', 'RAB11FIP1', 'TRAC', 'CD84', 'CAST', 
                    'PYHIN1', 'JPT1', 'MPST', 'SAMSN1', 'ZC2HC1A', 'PMAIP1', 'S100A6', 
                    'PI16', 'OAS1', 'PPP1R18', 'NCR3', 'CD58', 'DOK2', 'BCL2L11', 
                    'SMAD3', 'GBP2', 'SYT11', 'PPP2R5C', 'PPP1R15A', 'RGS1', 'RAB37', 
                    'REEP5', 'IKZF3', 'RNF214', 'IRF1', 'ANTKMT', 'PRDX1', 'IQGAP2', 'MT2A', 
                    'TNFRSF18', 'FAS', 'DUSP2', 'CCDC167', 'HLA-DRA', 'PELI1', 'JUNB', 'LGALS9', 
                    'ZBTB38', 'SPTAN1', 'SMS', 'PARP1', 'MCL1', 'DYNLL1', 'HERC5', 'CDC25B', 
                    'SLC9A3R1', 'MYO1G', 'TPR', 'JUN', 'ID3', 'TPI1', 'RILPL2', 'CCR6', 
                    'YWHAH', 'PTGER2', 'HNRNPLL', 'PREX1', 'PSMB9', 'MYH9', 'CORO1B', 'SLAMF1', 
                    'SIT1', 'NPDC1', 'PHACTR2', 'ST8SIA6', 'ATP2B4', 'IL18R1', 'TRIM22', 'HLA-DQB1', 
                    'F5', 'TBC1D4', 'MAF', 'ATP2B1', 'C4orf48', 'GALM', 'C12orf75', 'CPA5', 
                    'PPP1CA', 'S1PR4', 'PDE4DIP', 'GATA3', 'GLIPR2', 'CHST7', 'CXCR4', 'H1-4', 
                    'GADD45B', 'RESF1', 'IER2', 'ISG15', 'CDC42EP3', 'PCBD1', 'LYST', 'TPM4', 
                    'TAB2', 'NINJ2', 'ALOX5AP', 'CCR4', 'FCER1G', 'MAP3K1', 'CXCR3', 'CD59', 
                    'SLFN5', 'CCNG2', 'ITGA4', 'SESN1', 'SPATS2L', 'HPGD', 'EFHD2', 'LIMA1', 
                    'BCL2', 'RABGAP1L', 'TOX', 'SAMD9', 'TXN', 'IFI16', 'IDS', 'TRIB2', 'CDHR3', 
                    'PALM2AKAP2', 'ICA1', 'LPAR6', 'KLRB1', 'SMC6', 'ITM2C', 'CEACAM4', 'PRF1', 
                    'CD63', 'AHR', 'IQGAP1', 'GADD45A', 'ADAM8', 'GLIPR1', 'VAV3', 'EPSTI1', 
                    'GPRIN3', 'POU2F2', 'SH2D2A', 'DENND10', 'MAP4', 'CCR10', 'PTPN18', 'IER5', 
                    'GPR183', 'LAIR2', 'CTSC', 'TTN', 'NR4A2', 'SESN3', 'CPNE2', 'MX1')


In [None]:
fc.df  <- fc  %>%   dplyr::select(gene, avg_log2FC)
ranks<- deframe(fc.df)

In [None]:
library(fgsea)

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)

plotEnrichment(treg_gene_sig,
               ranks) + labs(title="treg_gene_sig in Tgd") 


In [None]:
fg  <- fgsea(pathways = list("treg_gene_sig" = treg_gene_sig),
               ranks)

In [None]:
fg$leadingEdge

In [None]:
cd8_l1_full_filt  <- AddModuleScore(cd8_l1_full_filt, features = treg_gene_sig,
                                   search = F,
  ctrl = 50,
  nbin = 50,
  assay = "RNA",
  name = 'treg_gene_sig')

In [None]:
cd8_l1_full_filt$treg_gene_sig1

In [None]:
options(repr.plot.width = 10, repr.plot.height = 8)

VlnPlot(cd8_l1_full_filt, features = "treg_gene_sig1", pt.size = 0)

In [None]:
library(GEOquery)

In [None]:
gset <- getGEO("GSE106082", GSEMatrix =TRUE, getGPL=TRUE, AnnotGPL=TRUE)

In [None]:
library(readr)

In [None]:
metadata <- data.frame(geo_id = gset$GSE106082_series_matrix.txt.gz$geo_accession,
                       cell_type = gset$GSE106082_series_matrix.txt.gz$`characteristics_ch1.1`
                       )

In [None]:
mtx_tr356  <- read_csv("../../240218_VN_Diabetes_V05/data/published_data/Terrazzano_2020/tr356_df_sum.csv")

In [None]:
mtx_tr356$`...1`  <- NULL

In [None]:
mtx_tr356  <- mtx_tr356  %>% column_to_rownames("SYMBOL")

In [None]:
mtx_tr356

In [None]:
plan("multisession")

In [None]:
pred <- SingleR(test = cd8_l1_full_filt@assays$RNA@counts, 
                ref=mtx_tr356, labels=metadata$cell_type, de.method="wilcox",fine.tune = F,num.threads = 4
       )

In [None]:
cd8_diet  <- subset(cd8_l1_full_filt, barcode %in% c(sample(colnames(cd8_l1_full_filt), size = 5000)))

In [None]:
cd8_diet

In [None]:
mtx_tr356

In [None]:
mtx_tr356  <- mtx_tr356  %>% mutate_all(.funs = as.numeric)

In [None]:
mtx_tr356

In [None]:
ref_tr3_56 <- list(matrix = mtx_tr356[,c(1:6,10:12)], 
                       labels = metadata$cell_type[c(1:6,10:12)])


In [None]:
mtx_tr356

In [None]:
ref_tr3_56$labels

In [None]:
ref_tr3_56$matrix  <- as.matrix(ref_tr3_56$matrix)

In [None]:
head(rownames(ref_tr3_56$matrix))

In [None]:
pred <- SingleR(test = cd8_l1_full_filt@assays$RNA@counts, 
                ref=ref_tr3_56$matrix, labels=ref_tr3_56$labels, 
                fine.tune = T,num.threads = 4
       )

In [None]:
pred


In [None]:
cd8_l1_full_filt$singler  <- pred$labels

In [None]:
DimPlot(cd8_l1_full_filt, group.by = "annotations_l2")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4.5)
DimPlot(cd8_l1_full_filt, group.by = "singler", shuffle = T, cols = c("grey88","skyblue1","red"))

In [None]:
cd8_l1_full_filt$  <- pred$labels

In [None]:
cd8_l1_full_filt@meta.data  <- cd8_l1_full_filt@meta.data  %>% 
mutate(Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation)) 

In [None]:
test  <- data.frame(annotation = cd8_l1_full_filt$annot2,
                    pred = cd8_l1_full_filt$singler)

In [None]:
df2 <- test %>% group_by(annotation, pred) %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4.5)
ggplot(test) +
  aes(x = annotation, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
coord_flip() + 
scale_fill_manual(values = c("#d6ebd2ff", "lightskyblue1", "red2"))
#scale_fill_manual(values = c("grey","#74bc68ff", "dodgerblue3", "red2"))

In [None]:
options(repr.plot.width = 8, repr.plot.height = 4.5)
ggplot(test) +
  aes(x = annotation, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
scale_fill_manual(values = c("white", "white", "red2"))
#scale_fill_manual(values = c("grey","#74bc68ff", "dodgerblue3", "red2"))

In [None]:
options(repr.plot.width = 7, repr.plot.height = 30)

ggplot(test) +
  aes(x = annotation, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
scale_fill_manual(values = c("white", "white", "red2")) + 
scale_y_continuous(n.breaks = 40, expand = c(0,NA)) +
 theme(legend.title = element_blank(),axis.text.x = element_text(angle = 90)) 

ggsave("../figures/cd8_temra_pct.svg", width = 15, height = 45, units = "cm")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4.5)
DimPlot(cd8_l1_full_filt, group.by = "annot2", cols = c("#c77cffff","#da56b3ff","#92c0dfff", "#2078cdff","#74bc68ff"))

In [None]:

DimPlot(cd8_l1_full_filt, group.by = "annot2", cols = c("#6599caff","#f66ecdff",
                                                        "#6ee8bfff", "#9ad5ffff","#a4d034ff"))

In [None]:

DimPlot(cd8_l1_full_filt, group.by = "annot2", cols = c("#6294c3ff","#c861c4ff",
                                                        "#8cd8bfff", "#9ad5ffff","#b2d164ff"))

In [None]:
options(repr.plot.width = 5, repr.plot.height = 4)

DimPlot(cd8_l1_full_filt, group.by = "annot2", cols = c("#c861c4ff","#c861c4ff",
                                                        "grey78", "grey78","grey78"))

In [None]:
cd8_l1_full_filt$score_tr356  <- pred$scores[,3]

In [None]:
VlnPlot(cd8_l1_full_filt, features = "score_tr356", group.by = "annotations_l2")

In [None]:
options(repr.plot.width = 14, repr.plot.height = 6)
data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = "\n"))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
ggplot(aes(x = fct_reorder(Annotation, Score), y = Score)) +
  geom_violin() + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")

In [None]:
data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
ggplot(aes(x = fct_reorder(annot2, Score), y = Score)) +
  geom_violin() + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")

### Score per patient

In [None]:
options(repr.plot.width = 6, repr.plot.height = 8)

data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""),
          Patient_ID = cd8_l1_full_filt$Patient_ID,
          Patient_Time = cd8_l1_full_filt$Patient_Time,
          Disease = cd8_l1_full_filt$Disease,
           Condition = cd8_l1_full_filt$Condition
          )  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
ggplot(aes(x = Condition, y = Score)) +
  geom_violin() + 
stat_compare_means(comparisons = list(c(1,2),c(2,3),c(1,3))) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")

In [None]:

data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""),
          Patient_ID = cd8_l1_full_filt$Patient_ID,
          Patient_Time = cd8_l1_full_filt$Patient_Time,
          Disease = cd8_l1_full_filt$Disease, 
           Condition = cd8_l1_full_filt$Condition
          )  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
group_by(Condition, Patient_Time)  %>% 
summarize(Score = mean(Score))  %>% 
ggplot(aes(x = Condition, y = Score)) +
  geom_violin() + 
geom_point()+
stat_compare_means(comparisons = list(c(1,2),c(2,3))) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score All CD8 cells") + theme_classic() + ggtheme() + xlab("")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 6)

data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""),
          Patient_ID = cd8_l1_full_filt$Patient_ID,
          Patient_Time = cd8_l1_full_filt$Patient_Time,
          Disease = cd8_l1_full_filt$Disease, 
           Condition = cd8_l1_full_filt$Condition
          )  %>% 
dplyr::filter(Patient_ID != 116)  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
group_by(Condition, Patient_Time)  %>% 
summarize(Score = mean(Score))  %>% 
ggplot(aes(x = Condition, y = Score)) +
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
stat_compare_means(comparisons = list(c(2,3)), paired = T) +
   ggtitle("TR3-56 score All CD8 cells") + theme_classic() + ggtheme() + xlab("")

In [None]:
ggsave("../figures/subset_characterization/tr3_56_score_in_condition.svg",
       width = 10, height = 9.5, units = "cm")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 6)

data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""),
          Patient_ID = cd8_l1_full_filt$Patient_ID,
          Patient_Time = cd8_l1_full_filt$Patient_Time,
          Disease = cd8_l1_full_filt$Disease, 
           Condition = cd8_l1_full_filt$Condition
          )  %>% 
dplyr::filter(Patient_ID != 116)  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
group_by(Condition, Patient_Time)  %>% 
summarize(Score = mean(Score))  %>% 
ggplot(aes(x = Condition, y = Score)) +
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
stat_compare_means(comparisons = list(c(2,3)), paired = T) +
   ggtitle("TR3-56 score All CD8 cells") + theme_classic() + ggtheme() + xlab("")

In [None]:
data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""),
          Patient_ID = cd8_l1_full_filt$Patient_ID,
          Patient_Time = cd8_l1_full_filt$Patient_Time,
          Disease = cd8_l1_full_filt$Disease, 
           Condition = cd8_l1_full_filt$Condition
          )  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
group_by(Condition, Patient_Time)  %>% 
summarize(Score = mean(Score))  %>% 
ggplot(aes(x = Condition, y = Score)) +
  geom_violin() + 
geom_point()+
stat_compare_means(comparisons = list(c(1,2),c(2,3))) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score All CD8 cells") + theme_classic() + ggtheme() + xlab("")

In [None]:
data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
ggplot(aes(x = fct_reorder(annot2, Score), y = Score)) +
  geom_violin() + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")
ggsave("../figures/cd356_score.svg", width = 4, height = 3.5)

### Scre in Unconventional

In [None]:
cd8_l2_unc_orig  <- readRDS("../../240617_VN_Diabetes_V06/data/processed/L2/cd8_l2_unc.rds")

In [None]:
cd8_l2_unc_orig$Patient_ID  %>% table

In [None]:
cd8_l2_unc  <- cd8_l2_unc_orig

In [None]:
colnames(cd8_l2_unc) %in% colnames(cd8_l1_full_filt)  %>% table

In [None]:
cd8_l2_unc$barcode  <- colnames(cd8_l2_unc)

In [None]:
md_unc  <- cd8_l2_unc@meta.data

In [None]:
md_unc

In [None]:
md_unc2  <- md_unc   %>% 
left_join(cd8_l1_full_filt@meta.data  %>% dplyr::select(barcode, score_tr356, singler))

In [None]:
rownames(md_unc2)  <- colnames(cd8_l2_unc)

In [None]:
md_unc2

In [None]:
colnames(cd8_l2_unc)

In [None]:
cd8_l2_unc@meta.data  <- md_unc2

In [None]:
cd8_l2_unc@meta.data

In [None]:
colnames(cd8_l2_unc)

In [None]:
options(repr.plot.width = 5, repr.plot.height = 4.5)

FeaturePlot(cd8_l2_unc, features = "score_tr356", min.cutoff = 0.2, max.cutoff = 0.32)

In [None]:
options(repr.plot.width = 10, repr.plot.height = 6 )
DimPlot(cd8_l2_unc, group.by = "singler", cols = c("grey88", "blue", "red"))

In [None]:
DimPlot(cd8_l2_unc)

In [None]:
cd8_l2_unc$annotations_manual  %>% table

In [None]:
test  <- data.frame(annotation = cd8_l2_unc$seurat_clusters,
                    pred = cd8_l2_unc$singler)

df2 <- test %>% group_by(annotation, pred) %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

df2

options(repr.plot.width = 8, repr.plot.height = 4.5)
ggplot(test) +
  aes(x = annotation, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
coord_flip() + 
scale_fill_manual(values = c("grey88", "lightskyblue1", "red2"))


In [None]:
options(repr.plot.width = 8, repr.plot.height = 6)

data.frame(Score = cd8_l2_unc$score_tr356,
                  Annotation = cd8_l2_unc$annotations_manual,
          Patient_ID = cd8_l2_unc$Patient_ID,
          Patient_Time = cd8_l2_unc$Patient_Time,
          Disease = cd8_l2_unc$Disease,
           Condition = cd8_l2_unc$Condition
          )  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
ggplot(aes(x = Condition, y = Score)) +
  geom_violin() + 
stat_compare_means(comparisons = list(c(1,2),c(2,3))) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")

In [None]:

data.frame(Score = cd8_l2_unc$score_tr356,
                  Annotation = cd8_l2_unc$annotations_manual,
          Patient_ID = cd8_l2_unc$Patient_ID,
          Patient_Time = cd8_l2_unc$Patient_Time,
          Disease = cd8_l2_unc$Disease, 
           Condition = cd8_l2_unc$Condition
          )  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
group_by(Condition, Patient_Time)  %>% 
summarize(Score = mean(Score))  %>% 
ggplot(aes(x = Condition, y = Score)) +
  geom_violin() + 
geom_point()+
stat_compare_means(comparisons = list(c(1,2),c(2,3))) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score CD8 unconventional") + theme_classic() + ggtheme() + xlab("")

In [None]:
data.frame(Score = cd8_l2_unc$score_tr356,
                  Annotation = gsub(cd8_l2_unc$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
ggplot(aes(x = fct_reorder(annot2, Score), y = Score)) +
  geom_violin() + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")
ggsave("../figures/cd356_score.svg", width = 4, height = 3.5)

In [None]:
cd8_l1_full_filt@meta.data  <- cd8_l1_full_filt@meta.data  %>% 
mutate(Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation)) 

In [None]:
cd8_l1_full_filt$annotations_l2_sample  <- paste(cd8_l1_full_filt$annot2, cd8_l1_full_filt$Sample_ID)

In [None]:
cd8_l1_full_filt$annotations_l2_sample   %>% table

In [None]:
# Extract the dataframe with seurat metadata
seurat_meta_data <- cd8_l1_full_filt@meta.data
seurat_meta_data$sample <- seurat_meta_data$annotations_l2_sample

# Select genes of interest
# The script only works for two or more genes, you can't select just one!
gene_hits <- c("NCAM1", "TRGC1", "CD8A", "CD8B", 
               "FCGR3A","CD3D","CD3G","CD3Z")

# By default the percentage of calculated cells is computed for whole sample
# Optionally, you can select a cluster that you're interested in by subsetting:
# pbmc_small <- subset(pbmc_small, seurat_clusters == 3)


# Now we will calculate the percentage of expressing cells for each sample and we will merge the resulting dataframes
expr_data4 <- data.frame(genes = gene_hits)

# We will need a function that will convert any non-zero count to value 1
fns_replace <- function(x){ifelse(x>0,1,0)}

for(j in (pull(seurat_meta_data, sample)  %>% unique)){
  
  # subset only selected cell type
  seu_sub_sample <- subset(cd8_l1_full_filt, annotations_l2_sample == j) 
  
  # select the rows corresponding to genes of interest
  index_subset <- which(rownames(seu_sub_sample@assays$RNA@counts) %in% gene_hits)
  
  # create a dataframe with genes of interest and cells of interest
  expr_data <- as.data.frame(seu_sub_sample@assays$RNA@counts[index_subset,]) 
  
  # convert expression to binary values
  expr_data2 <- expr_data %>% mutate(across(.fns = fns_replace))
  rownames(expr_data2) <- rownames(expr_data)
  
  # calculate average expresion (percentage of cells expressing the gene)
  expr_data3 <- rowMeans(expr_data2)
  
  # add zeroes in cases of no expression
  for(k in gene_hits){
    if(k %in% names(expr_data3) == F){expr_data3[[k]] <- 0}
  }
  
  expr_data3 <- as.data.frame(expr_data3)
  colnames(expr_data3) <- j
  expr_data3$genes <- rownames(expr_data3)
  
  # final dataframe with values in correct order (all cell type, loop results)
  expr_data4 <- left_join(expr_data4, expr_data3, by="genes")
  
}

# Now we have the whole dataframe, we just need to tidy it a bit, add metadata and visualize

expr_data5 <- as.data.frame(t(expr_data4))
colnames(expr_data5) <- expr_data5[1,]
expr_data5 <- expr_data5[2:nrow(expr_data5),]
expr_data5$sample <- rownames(expr_data5)
expr_data5 <- expr_data5 %>% pivot_longer( !sample, names_to = "gene", values_to = "pct_express")

# Add metadata per sample - select those that you will use in the plot below
md_to_join <- seurat_meta_data %>% dplyr::select(sample = annotations_l2_sample)  %>% 
unique() %>% mutate(sample = as.character(sample))
md2 <- left_join(expr_data5, md_to_join, by = "sample") %>% ungroup %>% unique



In [None]:
md2

In [None]:
md2 %>% 
mutate(Sample_ID = substr(sample,nchar(sample)-3,nchar(sample)))  %>% 
mutate(population = substr(sample,1,nchar(sample)-3)) 

In [None]:
options(repr.plot.width = 20, repr.plot.height = 12)

md2 %>% 
dplyr::filter(gene != "CD3Z")  %>% 
pivot_wider(names_from = "gene", values_from = "pct_express", values_fill = "0")  %>% 
pivot_longer(cols = c(2:8), names_to = "gene", values_to = "pct_express")  %>% 
mutate(Sample_ID = substr(sample,nchar(sample)-3,nchar(sample)))  %>% 
mutate(population = substr(sample,1,nchar(sample)-3))  %>% 
ggplot(aes(x = population, y = as.numeric(pct_express))) + 
 geom_violin(alpha = 0.3, aes(fill = population), scale = "width") + 
 
geom_beeswarm(size = 1.5, aes(fill = population), cex = 2, 
                shape = 21, color = "grey40",corral = "random") +
 stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
 #stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
  theme_minimal() + 
  facet_wrap(~gene, scales = "free", ncol = 5) + 
  ylim(c(0,NA)) +
#ggpubr::stat_compare_means()+
  ylab("Percentage of expressing cells") +
  theme_classic() + ggtheme() +
 theme(legend.title = element_blank(),axis.text.x = element_text(angle = 90)) 

ggsave("../figures/scRNAseq_gd_characterization.svg", width = 13, height = 9)

In [None]:
options(repr.plot.width = 15, repr.plot.height = 8)

md2 %>% 
pivot_wider(names_from = "gene", values_from = "pct_express", values_fill = "0")  %>% 
pivot_longer(cols = c(2:9), names_to = "gene", values_to = "pct_express")  %>% 
mutate(Sample_ID = substr(sample,nchar(sample)-3,nchar(sample)))  %>% 
mutate(population = substr(sample,1,nchar(sample)-3))  %>% 
ggplot(aes(x = population, y = as.numeric(pct_express))) + 
  geom_boxplot(outlier.shape = NA) +
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(position=position_jitter(0.2), size = 2, aes(color = factor(population))) +
  theme_minimal() + 
  facet_wrap(~gene, scales = "free", ncol = 5) + 
  ylim(c(0,NA)) +
ggpubr::stat_compare_means()+
  ylab("Percentage of expressing cells") +
  theme(legend.title = element_blank())

In [None]:
FeaturePlot(cd8_l1_full_filt, features = "score_tr356", min.cutoff = 0.2, cols = c("#2d95ffff","#ff4140ff","#ff4140ff"))

In [None]:
FeaturePlot(cd8_l1_full_filt, features = "score_tr356", min.cutoff = 0.2, cols = c("white","white","#ff4140ff"))

In [None]:
FeaturePlot(cd8_l1_full_filt, features = "score_tr356", min.cutoff = 0.2, cols = c("grey88","red","#ff4140ff"))

### Without CD3+CD56- 

In [None]:
library(GEOquery)

In [None]:
gset <- getGEO("GSE106082", GSEMatrix =TRUE, getGPL=TRUE, AnnotGPL=TRUE)

In [None]:
library(readr)

In [None]:
metadata <- data.frame(geo_id = gset$GSE106082_series_matrix.txt.gz$geo_accession,
                       cell_type = gset$GSE106082_series_matrix.txt.gz$`characteristics_ch1.1`
                       )

In [None]:
mtx_tr356  <- read_csv("../data/published_data/Terrazzano_2020/tr356_df_sum.csv")

In [None]:
mtx_tr356$`...1`  <- NULL

In [None]:
mtx_tr356  <- mtx_tr356  %>% column_to_rownames("SYMBOL")

In [None]:
mtx_tr356  <- mtx_tr356  %>% mutate_all(.funs = as.numeric)

In [None]:
mtx_tr356

In [None]:
ref_tr3_56 <- list(matrix = mtx_tr356, 
                       labels = metadata$cell_type)


In [None]:
mtx_tr356

In [None]:
ref_tr3_56$labels

In [None]:
ref_tr3_56$matrix  <- as.matrix(ref_tr3_56$matrix)

In [None]:
head(rownames(ref_tr3_56$matrix))

In [None]:
pred <- SingleR(test = cd8_l1_full_filt@assays$RNA@counts, 
                ref=ref_tr3_56$matrix, labels=ref_tr3_56$labels, 
                fine.tune = T,num.threads = 4
       )

In [None]:
pred


In [None]:
cd8_l1_full_filt$singler  <- pred$labels

In [None]:
DimPlot(cd8_l1_full_filt, group.by = "annotations_l2")

In [None]:
DimPlot(cd8_l1_full_filt, group.by = "singler", shuffle = T, cols = c("purple","grey88","dodgerblue","red"))

In [None]:
test  <- data.frame(annotation = cd8_l1_full_filt$annotations_l2,
                    pred = cd8_l1_full_filt$singler)

In [None]:
df2 <- test %>% group_by(annotation, pred) %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
df2

In [None]:
df2

In [None]:
cd8_l1_full_filt$score_tr356  <- pred$scores[,4]

In [None]:
VlnPlot(cd8_l1_full_filt, features = "score_tr356", group.by = "annotations_l2")

In [None]:
options(repr.plot.width = 14, repr.plot.height = 6)
data.frame(Score = cd8_l1_full_filt$score_tr356,
                  Annotation = gsub(cd8_l1_full_filt$annotations_l2, pattern = "---", replacement = "\n"))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD8 NK cells", replacement = ""))  %>% 
ggplot(aes(x = fct_reorder(Annotation, Score), y = Score)) +
  geom_violin() + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")

In [None]:
FeaturePlot(cd8_l1_full_filt, features = "NCAM1", min.cutoff = 0.2)

In [None]:
FeaturePlot(cd8_l1_full_filt, features = "NCAM1", min.cutoff = 0.2)

In [None]:
VlnPlot(cd8_l1_full_filt, features = "NCAM1", group.by = "annotations_l2")

In [None]:
cd8_l1_full_filt$annotations_l2_sample  <- paste(cd8_l1_full_filt$annotations_l2, cd8_l1_full_filt$Sample_ID)

In [None]:
cd8_l1_full_filt$annotations_l2_sample   %>% table

In [None]:
# Extract the dataframe with seurat metadata
seurat_meta_data <- cd8_l1_full_filt@meta.data
seurat_meta_data$sample <- seurat_meta_data$annotations_l2_sample

# Select genes of interest
# The script only works for two or more genes, you can't select just one!
gene_hits <- c("NCAM1", "TRGC1", "CD8A", "CD8B", 
               "FCGR3A","CD3D","CD3G","CD3Z")

# By default the percentage of calculated cells is computed for whole sample
# Optionally, you can select a cluster that you're interested in by subsetting:
# pbmc_small <- subset(pbmc_small, seurat_clusters == 3)


# Now we will calculate the percentage of expressing cells for each sample and we will merge the resulting dataframes
expr_data4 <- data.frame(genes = gene_hits)

# We will need a function that will convert any non-zero count to value 1
fns_replace <- function(x){ifelse(x>0,1,0)}

for(j in (pull(seurat_meta_data, sample)  %>% unique)){
  
  # subset only selected cell type
  seu_sub_sample <- subset(cd8_l1_full_filt, annotations_l2_sample == j) 
  
  # select the rows corresponding to genes of interest
  index_subset <- which(rownames(seu_sub_sample@assays$RNA@counts) %in% gene_hits)
  
  # create a dataframe with genes of interest and cells of interest
  expr_data <- as.data.frame(seu_sub_sample@assays$RNA@counts[index_subset,]) 
  
  # convert expression to binary values
  expr_data2 <- expr_data %>% mutate(across(.fns = fns_replace))
  rownames(expr_data2) <- rownames(expr_data)
  
  # calculate average expresion (percentage of cells expressing the gene)
  expr_data3 <- rowMeans(expr_data2)
  
  # add zeroes in cases of no expression
  for(k in gene_hits){
    if(k %in% names(expr_data3) == F){expr_data3[[k]] <- 0}
  }
  
  expr_data3 <- as.data.frame(expr_data3)
  colnames(expr_data3) <- j
  expr_data3$genes <- rownames(expr_data3)
  
  # final dataframe with values in correct order (all cell type, loop results)
  expr_data4 <- left_join(expr_data4, expr_data3, by="genes")
  
}

# Now we have the whole dataframe, we just need to tidy it a bit, add metadata and visualize

expr_data5 <- as.data.frame(t(expr_data4))
colnames(expr_data5) <- expr_data5[1,]
expr_data5 <- expr_data5[2:nrow(expr_data5),]
expr_data5$sample <- rownames(expr_data5)
expr_data5 <- expr_data5 %>% pivot_longer( !sample, names_to = "gene", values_to = "pct_express")

# Add metadata per sample - select those that you will use in the plot below
md_to_join <- seurat_meta_data %>% dplyr::select(sample = annotations_l2_sample)  %>% 
unique() %>% mutate(sample = as.character(sample))
md2 <- left_join(expr_data5, md_to_join, by = "sample") %>% ungroup %>% unique



In [None]:
md2

In [None]:
md2 %>% 
mutate(Sample_ID = substr(sample,nchar(sample)-3,nchar(sample)))  %>% 
mutate(population = substr(sample,1,nchar(sample)-3)) 

In [None]:
options(repr.plot.width = 15, repr.plot.height = 8)

md2 %>% 
pivot_wider(names_from = "gene", values_from = "pct_express", values_fill = "0")  %>% 
pivot_longer(cols = c(2:9), names_to = "gene", values_to = "pct_express")  %>% 
mutate(Sample_ID = substr(sample,nchar(sample)-3,nchar(sample)))  %>% 
mutate(population = substr(sample,1,nchar(sample)-3))  %>% 
ggplot(aes(x = population, y = as.numeric(pct_express))) + 
  geom_boxplot(outlier.shape = NA) +
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(position=position_jitter(0.2), size = 2, aes(color = factor(population))) +
  theme_minimal() + 
  facet_wrap(~gene, scales = "free", ncol = 5) + 
  ylim(c(0,NA)) +
ggpubr::stat_compare_means()+
  ylab("Percentage of expressing cells") +
  theme(legend.title = element_blank())

In [None]:
ggplot(test) +
  aes(x = annotation, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
coord_flip() + 
scale_fill_manual(values = c("#74bc68ff","#d6ebd2ff", "#bde0f7ff", "red2"))
#scale_fill_manual(values = c("grey","#74bc68ff", "dodgerblue3", "red2"))

In [None]:
cd8_l1_full_filt

# DE genes heatmap avg.

In [None]:
avgexp = AverageExpression(subset(cd8_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = F, group.by = "Patient_Time", 
                          assay = "RNA")

In [None]:
avgexp$RNA[which(rownames(avgexp$RNA)=="GNLY"),]  %>% as.data.frame()

## Heatmap

### All cells

In [None]:
cd8_l1_full_filt$Patient_Time_Disease  <- paste(cd8_l1_full_filt$Patient_ID, cd8_l1_full_filt$Time, 
                                               cd8_l1_full_filt$Disease)

In [None]:
cd8_l1_full_filt$Patient_Time_Disease   %>% table

In [None]:
cd8_l1_full_filt$Disease_time  <- paste(
                                               cd8_l1_full_filt$Disease,
cd8_l1_full_filt$Time)

In [None]:
avgexp = AverageExpression(subset(cd8_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = F, group.by = "Disease_time", 
                          assay = "RNA")

In [None]:
genes  <- c("PCBP2","PCBP1",
            "CX3CR1","TNF","GZMB","GZMA","PRF1","NKG7","GNLY","CCL5","CST7",
            "BTG1","SELL","IL7R","CCR7","BTG2","SLAMF6","LEF1",
             "TNFAIP3","TSC22D3","NKFBIA","DUSP1")

In [None]:
avgexp  <- avgexp$RNA[which(rownames(avgexp$RNA) %in% genes),]

In [None]:
library(pheatmap)

options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp, main = "", scale = "row", cluster_cols = F, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
dir.create("../tables/avg_heatmap/")
write.csv(avgexp, "../tables/avg_heatmap/cd8_avg.csv")

### Average of patient

In [None]:
cd8_l1_full_filt$Patient_Time_Disease  <- paste(cd8_l1_full_filt$Patient_ID, cd8_l1_full_filt$Time, 
                                               cd8_l1_full_filt$Disease)

In [None]:
avgexp = AverageExpression(subset(cd8_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = T, group.by = "Patient_Time_Disease", 
                          assay = "RNA")

In [None]:
avgexp$Patient_Time_Disease  <- colnames(avgexp)
avgexp@meta.data  <- avgexp@meta.data  %>% 
separate(Patient_Time_Disease, into = c("Patient", "Time", "Disease"), sep = " ", remove = F)

In [None]:
avgexp$Disease_Time  <- paste(avgexp$Disease, avgexp$Time)

In [None]:
avgexp2 = AverageExpression(avgexp, 
                             return.seurat = F, group.by = "Disease_Time")

In [None]:
genes4  <- c("LEF1","BACH2","NELL2","TCF7","CXCR4","ZFP36L2","IL7R","KLF2","CCR7","SELL",
            
             
            rev(c("TNFRSF9","PRF1","NKG7","GZMB","CST7","GNLY","GZMA","CX3CR1","CCL5","TNF","TBX21","IFNG")),
             "TNFAIP3","DUSP1","TSC22D3","NFKBIA","DDIT4","INPP4B",
             rev(c("OASL","IFI44L","ISG15","MX1","STAT1","IFI6","IFIT3")),
                  "BTN3A2","BTN3A3","HLA-C","HLA-DQA1","HLA-DQB1")

In [None]:
avgexp3  <- avgexp2$RNA[which(rownames(avgexp2$RNA) %in% genes4),]

In [None]:
options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp3[match(genes4, rownames(avgexp3)),], 
         main = "", scale = "row", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp3[match(genes4, rownames(avgexp3)),], 
         main = "", scale = "row", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 3.3, height = 9,
                  fontsize = 9, filename = "../figures/heatmaps/fig2_cd8.pdf")

In [None]:
plot_gene  <- function(gene){
    options(repr.plot.width = 8.5, repr.plot.height = 6)
    df  <- as.data.frame(avgexp$RNA[which(rownames(avgexp$RNA)==gene),])  %>% rownames_to_column("Patient_Time")
    colnames(df)[2]  <- "gene2"
    df  <-  df  %>% separate(Patient_Time, into = c("Patient","Time"), sep = " ", remove = F)  %>% 
    mutate(group = ifelse(substr(Patient,1,1)=="1","Dia","Ctrl"))  %>% 
    mutate(Condition = paste(group, Time))
   # print(df)
 p  <-    df  %>% ggplot(aes(x = Condition, y = gene2)) + 
 geom_violin(alpha = 0.3, aes(fill = Condition), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
  ylim(0,NA) +
  theme_classic() +
theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) +
  ggtitle(gene) + ggtheme()
    
    p2  <-    df  %>% dplyr::filter(Patient != 116)  %>% ggplot(aes(x = Condition, y = gene2)) + 
 geom_violin(alpha = 0.3, aes(fill = Condition), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(2,3)), paired = TRUE) +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
  ylim(0,NA) +
  theme_classic() +
theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) +
  ggtitle(gene) + ggtheme()
    p  <- p + p2
    return(p)
    }

In [None]:
ls()

In [None]:
plot_gene2  <- function(gene){
    options(repr.plot.width = 2.5, repr.plot.height = 4)
    df  <- as.data.frame(avgexp$RNA[which(rownames(avgexp$RNA)==gene),])  %>% rownames_to_column("Patient_Time")
    colnames(df)[2]  <- "gene2"
    df  <-  df  %>% separate(Patient_Time, into = c("Patient","Time"), sep = " ", remove = F)  %>% 
    mutate(group = ifelse(substr(Patient,1,1)=="1","Dia","Ctrl"))  %>% 
    mutate(Condition = paste(group, Time))
   # print(df)
 p  <-    df  %>% ggplot(aes(x = Condition, y = gene2)) + 
 geom_violin(alpha = 0.3, aes(fill = Condition), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
  ylim(0,NA) +
  theme_classic() +
theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_blank(),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) +
  ggtitle(gene) + ggtheme() + NoLegend()
  
    return(p)
    }

In [None]:
avgexp

In [None]:
plot_gene("IFI44L")

In [None]:
plot_gene("ASCL2")

In [None]:
plot_gene2("BTN3A2")

In [None]:
plot_gene("ASCL2")

In [None]:
plot_gene("DUSP1")

In [None]:
plot_gene("TSC22D3")

In [None]:
plot_gene("TRGV9")

In [None]:
FeaturePlot(cd8_l2_subcluster, features = c("PTPRC-RA"), max.cutoff = 4, 
        raster = TRUE, raster.dpi = c(900,900), pt.size = 4) + ggtheme()


In [None]:
plot_gene("IL7R")

In [None]:
plot_gene("GADD45B")