In [None]:
source("diabetes_analysis_v07.R")

In [None]:
rank_score_func <- function(df){
df <- df %>% mutate(score = -1*log(p_val_adj+(10^-310))*avg_log2FC*(pct.1/(pct.2+10^-300)))
return(df)
}

# Analysis of subsets

In [None]:
full  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/full.rds")

In [None]:
full$nCount_RNA  %>% summary()

In [None]:
full$nFeature_RNA  %>% summary()

In [None]:
cd4_l3_naive  <- readRDS("../data/processed/L3/cd4_l3_naive.rds")
cd4_l3_tfh  <- readRDS("../data/processed/L3/cd4_l3_tfh.rds")
cd4_l3_th1th17  <- readRDS("../data/processed/L3/cd4_l3_th1_17.rds")
cd4_l3_nfkb  <- readRDS("../data/processed/L3/cd4_l3_nfkb.rds")
cd4_l3_th2  <- readRDS("../data/processed/L3/cd4_l3_th2.rds")
cd4_l3_isaghi  <- readRDS("../data/processed/L3/cd4_l3_isaghi.rds")
cd4_l3_proliferating  <- readRDS("../data/processed/L3/cd4_l3_proliferating.rds")

In [None]:
cd4_l3_temra  <- readRDS("../data/processed/L3/cd4_l3_temra.rds")

In [None]:
cd4_l3_treg  <- readRDS("../data/processed/L3/cd4_l3_treg.rds")

In [None]:
cd4_l2_unc  <- readRDS("../data/processed/L2/cd4_l2_unc.rds")

In [None]:
cd4_l2_subcluster  <- readRDS("../data/processed/L2/cd4_subcluster.rds")

In [None]:
cd4_l1_full_filt  <- readRDS("../data/processed/L1/cd4_l1_full_filt.rds")

In [None]:
cd4_l1_full_filt@meta.data  %>% group_by(Patient_ID, Experiment_ID, Disease)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% group_by(Experiment_ID, Disease)  %>% tally  %>% arrange(Experiment_ID)

In [None]:
cd4_l1_full_filt

In [None]:
cd4_l3_list  <- list(cd4_l3_naive, cd4_l3_tfh, cd4_l3_th1th17, cd4_l3_nfkb, cd4_l3_th2,
                     cd4_l3_treg, cd4_l3_isaghi, cd4_l3_proliferating, cd4_l3_temra,
                     cd4_l2_unc, cd4_l2_subcluster, cd4_l1_full_filt)

names(cd4_l3_list)  <- c("cd4_l3_naive", "cd4_l3_tfh", "cd4_l3_th1th17", "cd4_l3_nfkb", "cd4_l3_th2",
                     "cd4_l3_treg", "cd4_l3_isaghi", "cd4_l3_proliferating", "cd4_l3_temra",
                     "cd4_l2_unc", "cd4_l2_subcluster", "cd4_l1_full_filt")

# Plot of Unconventional populations

In [None]:
DimPlot(cd4_l1_full_filt, cols = c("grey88","brown3"), pt.size = 2, raster = T) + NoLegend()

In [None]:
ggsave(filename = paste0(paste0("../figures/Dimplots_unconventional/unconventional_cd4.png")), width = 12, height = 12, units = "cm")
ggsave(filename = paste0(paste0("../figures/Dimplots_unconventional/unconventional_cd4.svg")), width = 12, height = 12, units = "cm")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
FeaturePlot(cd4_l1_full_filt, features = "PTPRC-RA", max.cutoff = 5)

In [None]:
cd4_l2_subcluster@meta.data  %>% group_by(Patient_ID, Condition)  %>% tally  %>% dplyr::select(-n)  %>% 
group_by(Condition)  %>% tally

In [None]:
DefaultAssay(cd4_l1_full_filt)  <- "RNA"

In [None]:
mrk  <- FindAllMarkers(cd4_l1_full_filt, only.pos = T)

In [None]:
mrk  %>% filter(cluster == "Unconventional T cells")

In [None]:
markers2  <- rev(c("CD4","TRAC",
              "TRDC","CD79A","ZBTB16","CR1","FAM13A",
                   "PTGER2", "PRKD3","ADAM23","AUTS2","ANXA1","RTKN2",
                   "ZNF462","NCR3","CYB561","LTB","PTGDR","LY6E","TRGC1"
))

avgexp = AverageExpression(cd4_l1_full_filt, features = markers2,
                           return.seurat = F, group.by = "annotations_l2", 
                          assay = "RNA")

In [None]:
options(repr.plot.width = 8, repr.plot.height = 3.7)
pheatmap(t(avgexp$RNA)[c(10,5,7,8,4,9,1,2,3,6),], main = "", 
         scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
         width = 8, height = 3,
                  fontsize = 9)

In [None]:
options(repr.plot.width = 8, repr.plot.height = 3.7)
pheatmap(t(avgexp$RNA)[c(10,5,7,8,4,9,1,2,3,6),], main = "", 
         scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
        border_color = "white",filename = "../figures/heatmaps/heatmap_cd4_unconventional.pdf",
         width = 8, height = 3,
                  fontsize = 9)

In [None]:
cd4_l1_full_filt

In [None]:
DefaultAssay(cd4_l1_full_filt)  <- "CollecTRI"

In [None]:
Idents(cd4_l1_full_filt)  <- cd4_l1_full_filt$annotations_manual

In [None]:
mrk  <- FindAllMarkers(cd4_l1_full_filt, only.pos = T)

In [None]:
mrk  %>% filter(cluster == "Unconventional T cells")

In [None]:
markers2  <- rev(c("ASCL2", "ETV5", "SETBP1", 
              "POU3F1","SMARCA4","FOXH1",
                  "DACH1","CREB3", "ARNT",
                   "MECOM","RUNX1",
                  "ELF2","NEUROD1","EBF1",
                  "ID2","STAT5A","PURA","GTF2I","STAT3","STAT6"                 
))

avgexp = AverageExpression(cd4_l1_full_filt, features = markers2,
                           return.seurat = F, group.by = "annotations_l2", 
                          assay = "CollecTRI")

In [None]:
options(repr.plot.width = 7, repr.plot.height = 3.7)
pheatmap(t(avgexp$CollecTRI)[c(10,5,7,8,4,9,1,2,3,6),], main = "", 
         scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 9)

In [None]:
options(repr.plot.width = 7, repr.plot.height = 3.7)
pheatmap(t(avgexp$CollecTRI)[c(10,5,7,8,4,9,1,2,3,6),], main = "", 
         scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
          filename = "../figures/heatmaps/heatmap_cd4_unconventional_collecTRI.pdf",
         width = 8, height = 3,
                           fontsize = 9)

In [None]:
cd4_l1_full_filt$v_gene_A1  %>% table

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cd4_l1_full_filt, 
        cells.highlight = colnames(cd4_l1_full_filt)[cd4_l1_full_filt$v_gene_A1 == "TRAV10" & 
                                                    cd4_l1_full_filt$j_gene_A1 == "TRAJ18"]) + NoLegend()

In [None]:
ggsave(filename = paste0(paste0("../figures/Dimplots_unconventional/trav10_traj18_cd4.png")), create.dir = TRUE, width = 12, height = 12, units = "cm")
ggsave(filename = paste0(paste0("../figures/Dimplots_unconventional/trav10_traj18_cd4.svg")), width = 12, height = 12, units = "cm")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
FeaturePlot(cd4_l1_full_filt, features = "ZBTB16", min.cutoff = 0, max.cutoff = 2, raster = T, pt.size = 2,
           raster.dpi = c(900,900))

In [None]:
ggsave(filename = paste0(paste0("../figures/Dimplots_unconventional/zbtb16.png")), create.dir = TRUE, width = 14, height = 12, units = "cm")
ggsave(filename = paste0(paste0("../figures/Dimplots_unconventional/zbtb16.svg")), width = 14, height = 12, units = "cm")

In [None]:
cd4_l2_subcluster

In [None]:
rm(cd4_l2_subcluster)

In [None]:
gc()

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
FeaturePlot(cd4_l2_subcluster, features = c("PTPRC-RA"), cols = c("grey88","grey88","blue"), max.cutoff = 2,
        raster = TRUE, raster.dpi = c(900,900), pt.size = 4) + ggtheme()
#ggsave("../figures/prelim/cd4_ptprc_ra.svg", width = 13, height = 12, units = "cm", create.dir = TRUE)

## Heatmaps of subsets

In [None]:
Idents(cd4_l2_subcluster)  <- cd4_l2_subcluster$annotations_manual

mrk  <- FindAllMarkers(cd4_l2_subcluster, only.pos = TRUE)

mrk  <- rank_score_func(mrk)

markers  <- mrk  %>% arrange(desc(score))  %>% group_by(cluster)  %>% slice_head(n = 8)  %>% pull(gene)

In [None]:
avgexp = AverageExpression(cd4_l2_subcluster, features = markers,
                           return.seurat = F, group.by = "annotations_manual", 
                          assay = "RNA")

avgexp$RNA

In [None]:
options(repr.plot.width = 14, repr.plot.height = 3.7)
pheatmap(t(avgexp$RNA), main = "", 
         scale = "column", cluster_cols = T, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 9)

In [None]:
markers2  <- rev(c("SELL", "CCR7", "LEF1", "TCF7", 
              "CXCR5", "GPR183", "PRKCB","PPP2R5C",
              "CD69","TNFAIP3","JUN","FOS",
              "ISG15", "IFIT1", "OAS3",
              "FOXP3", "IKZF2","IL2RA",
              "GATA3", "IL13","NEFL","PLP2","IL4", "ZBTB16",
              "CCR6", "RORC" ,
              "ITGB1","S100A4","S100A6",
                   "IL23R","KLRB1","CCL5", "TBX21", "IFNG",
               "MKI67", "PCNA", "MCM6" ))

In [None]:
avgexp = AverageExpression(cd4_l2_subcluster, features = markers2,
                           return.seurat = F, group.by = "annotations_manual", 
                          assay = "RNA")

In [None]:
avgexp$RNA

In [None]:
options(repr.plot.width = 8, repr.plot.height = 3.7)
pheatmap(t(avgexp$RNA[,c(2,1,7,6,4,3,5,9,8)]), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 9)

In [None]:
dir.create("../figures/heatmaps/")

In [None]:
options(repr.plot.width = 8, repr.plot.height = 3.7)
pheatmap(t(avgexp$RNA[,c(2,1,7,6,4,3,5,9,8)]), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", filename = "../figures/heatmaps/heatmap_cd4_l2.pdf",
         width = 8, height = 3,
                  fontsize = 9)

# Dorothea and CollecTRI

## Heatmap of all subsets

In [None]:
DefaultAssay(cd4_l2_subcluster)  <- "CollecTRI"

In [None]:
Idents(cd4_l2_subcluster)  <- cd4_l2_subcluster$annotations_manual

In [None]:
coll  <- FindAllMarkers(cd4_l2_subcluster, only.pos = T)

In [None]:
genes  <- coll  %>% group_by(cluster)  %>% slice_head(n = 10)  %>% pull(gene)

In [None]:
dir.create("../tables/cd4/collectri_annotations/")

In [None]:
write.csv(coll, file = "../tables/cd4/collectri_annotations/cd4_l2_subcluster.csv")

In [None]:
coll  %>% group_by(cluster)  %>% slice_head(n = 20 )  %>% dplyr::filter(cluster == "Proliferating")

In [None]:
avgexp = AverageExpression(cd4_l2_subcluster, features = genes,
                           return.seurat = F, group.by = "annotations_manual", 
                          assay = "CollecTRI")

avgexp$CollecTRI

In [None]:
options(repr.plot.width = 16, repr.plot.height = 3.7)
pheatmap(t(avgexp$CollecTRI[,c(2,1,7,6,4,3,5,9,8)]), main = "", 
         scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 9)

In [None]:
genes2  <- rev(c("TBX18", "PRRX1", "PRDM4", 
                 "ID1","ETV7","PROX1",
                    "NFATC3","ELK4","ATF1",
                 "STAT1","STAT2","IRF3","IRF1",
             "IKZF2","BCL11B","KAT2B", "FOXP3",
              "KLF12","MAF","MYB","ZBTB4",
                 "RELB","RORC","PAX6","IRF6",
             "NFKBIB","SPIC","EOMES",
                "RFXAP","CIITA","NFKB","TP53",
             "E2F4","HES6","HEYL"))

In [None]:
avgexp = AverageExpression(cd4_l2_subcluster, features = genes2,
                           return.seurat = F, group.by = "annotations_manual", 
                          assay = "CollecTRI")

avgexp$CollecTRI

In [None]:
options(repr.plot.width = 8, repr.plot.height = 3.7)
pheatmap(t(avgexp$CollecTRI[,c(2,1,7,6,4,3,5,9,8)]), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 9)

In [None]:
pheatmap(t(avgexp$CollecTRI[,c(2,1,7,6,4,3,5,9,8)]), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", filename = "../figures/heatmaps/heatmap_cd4_l2_collectri.pdf",
         width = 8, height = 3,
                  fontsize = 9)

# Frequencies L2

In [None]:
df4  <- create_df4(cd4_l2_subcluster)

In [None]:
df4

In [None]:
 p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(1,2)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 20, repr.plot.height = 6)
p5

In [None]:
ggsave("../figures/subset_characterization/cd4_subsets_in_condition.svg",
       width = 40, height = 12, units = "cm")

In [None]:
    p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
dplyr::filter(Patient_ID != "116")  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(2,3)), paired = TRUE)+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 30, repr.plot.height = 7)
p5

### Unconventional and L2

In [None]:
df4  <- create_df4(cd4_l1_full_filt)

In [None]:
df4

In [None]:
 p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(1,2)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 9, repr.plot.height = 7)
p5

In [None]:
ggsave("../figures/subset_characterization/cd4_unc_subsets_in_condition.svg",
       width = 10, height = 12, units = "cm")

In [None]:
    p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
dplyr::filter(Patient_ID != "116")  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(2,3)), paired = TRUE)+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 9, repr.plot.height = 7)
p5

# Frequencies and counts of all populations

In [None]:
cd4_l1_full_filt

In [None]:
all_counts  <- read_csv("../tables/populations_freq/all_levels_counts_cd4.csv")

In [None]:
all_counts$Experiment_ID  %>% table

In [None]:
all_counts

In [None]:
df3  <- all_counts %>% 
  group_by(Sample_ID, Level) %>% 
  mutate(freq_from_total = n / sum(n)) 

In [None]:
df3

In [None]:
## Set parent population

In [None]:
df3  <- df3  %>% separate(annotations, into = c("annot_l1","annot_l2",NA), sep = "---", remove = F)  %>% 
mutate(Parent_annotation = case_when(
Level == "L1" ~ "CD4",
Level == "L2" ~ annot_l1,
Level == "L3" ~ paste0(annot_l1, "---" ,annot_l2)
))

In [None]:
df3

In [None]:
## For each level and each patient calculate the total count per parent population

In [None]:
levels_l1  <- all_counts %>% 
  filter(Level == "L1")  %>% 
pull(annotations)  %>% unique
levels_l2  <- all_counts %>% 
  filter(Level == "L2")  %>% 
pull(annotations)  %>% unique

In [None]:

for(i in 1:length(levels_l1)) {
    
    df_filt  <- df3  %>% filter(grepl(annotations, pattern = levels_l1[i]) & Level == "L2")

    df_filt  <- df_filt  %>% 
            group_by(Sample_ID)  %>% 
        summarise(total_per_patient = sum(n))

    df_filt$Level = "L2"

    df_filt$Parent_annotation = levels_l1[i]

    if(i > 1){
       
        df_sum_of_parent  <- rbind(df_sum_of_parent, df_filt)
        
    } else {
        df_sum_of_parent  <- df_filt
        
        
    }

}

for(i in 1:length(levels_l2)) {
    
    df_filt  <- df3  %>% filter(grepl(annotations, pattern = levels_l2[i]) & Level == "L3")

    df_filt  <- df_filt  %>% 
            group_by(Sample_ID)  %>% 
        summarise(total_per_patient = sum(n))

    df_filt$Level = "L3"

    df_filt$Parent_annotation = levels_l2[i]

    df_sum_of_parent  <- rbind(df_sum_of_parent, df_filt)
   

}

In [None]:
df_sum_of_parent

In [None]:
dim(df3)

In [None]:
df4  <- left_join(df3, df_sum_of_parent)

In [None]:
dim(df4)

In [None]:
df4

In [None]:
df4$freq_from_parent  <- df4$n/df4$total_per_patient

In [None]:
df4

In [None]:
df4$pct_from_total  <- df4$freq_from_total*100
df4$pct_from_parent  <- df4$freq_from_parent*100

In [None]:
df4$freq_from_parent <- ifelse(is.na(df4$freq_from_parent) & df4$Level != "L1", 0, df4$freq_from_parent)
df4$pct_from_parent  <- ifelse(is.na(df4$pct_from_parent) & df4$Level != "L1", 0, df4$pct_from_parent)

## Ordered frequency plots

In [None]:
geom_violin(alpha = 0.3, aes(fill = Condition), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format")+
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
  ylim(0,NA) +
  theme_classic() +
theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) +
  ylim(0,NA)+
  plot_annotation(
    title = paste(stringr::str_replace_all(string = colnames(intra02_All2)[i], pattern = "/", replacement = "\n")) ,
    theme = theme(plot.title = element_text(size = 8), 
                  plot.subtitle = element_text(size = 8),
                 axis.text.x = element_blank())) + ggtheme() + NoLegend()

In [None]:
options(repr.plot.width = 20, repr.plot.height = 28)

df4   %>% 
ggplot(aes(x = Condition,
             y = pct_from_parent)) +
geom_violin(alpha = 0.3, aes(fill = Condition), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
# stat_compare_means(label = "p.format")+
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
  ylim(0,NA) +
facet_wrap(~factor(annotations, labels = gsub(levels(factor(annotations)), 
                                              pattern = "---", replacement = "\n")),
           scales = "free", ncol = 6) +
ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", comparisons = list(c(1,2),
                                                                                  c(2,3)),
                           size = 3, vjust = 0.3, label = "p.format",
                          ) + 
      ggtitle("CD4 Pct from parent") +
 theme_classic() + ggtheme() + NoLegend() + theme(plot.title = element_text(hjust = 0.5, size = 22),
          axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(colour = "black"),
           axis.ticks.x = element_blank(),
           axis.text.x = element_blank())


In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)

df4   %>% 
ggplot(aes(x = Condition,
             y = pct_from_parent)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0, height = 0), 
                size = 1, stackdir='center', aes(color = Condition)) + 
  theme_classic() + xlab("") + ylab("Value") +
facet_wrap(~factor(annotations, labels = gsub(levels(factor(annotations)), 
                                              pattern = "---", replacement = "\n")),
           scales = "free", ncol = 6) +
 ylim(0,NA) +
ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 22),
          axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(colour = "black")) + ggtitle("CD4 Pct from parent")

In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)

df4   %>% 
ggplot(aes(x = Condition,
             y = pct_from_total)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0, height = 0), 
                size = 1, stackdir='center', aes(color = Condition)) + 
  theme_classic() + xlab("") + ylab("Value") +
facet_wrap(~factor(annotations, labels = gsub(levels(factor(annotations)), 
                                              pattern = "---", replacement = "\n")),
           scales = "free", ncol = 6) +
 ylim(0,NA) +
ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 22),
          axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(colour = "black")) + ggtitle("CD4 Pct from total")

In [None]:
options(repr.plot.width = 20, repr.plot.height = 28)

df4   %>% 
ggplot(aes(x = Condition,
             y = pct_from_total)) +
geom_violin(alpha = 0.3, aes(fill = Condition), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 2, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
# stat_compare_means(label = "p.format")+
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
  ylim(0,NA) +
facet_wrap(~factor(annotations, labels = gsub(levels(factor(annotations)), 
                                              pattern = "---", replacement = "\n")),
           scales = "free", ncol = 6) +
ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", comparisons = list(c(1,2),
                                                                                  c(2,3)),
                           size = 3, vjust = 0.3, label = "p.format",
                          ) + 
      ggtitle("CD4 Pct from parent") +
 theme_classic() + ggtheme() + NoLegend() + theme(plot.title = element_text(hjust = 0.5, size = 22),
          axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(colour = "black"),
           axis.ticks.x = element_blank(),
           axis.text.x = element_blank())


In [None]:
dir.create("../figures/populations/")

In [None]:
library(svglite)
ggsave("../figures/populations/cd4_from_total.svg", width = 40, height = 68, units = "cm")

## Plot of differences in Conditions

In [None]:
annotations_to_test  <- df4$annotations  %>% unique()
comparisons_to_test  <- c("Ctrl T0", "Dia T0", "Dia T1")

In [None]:
annotations_to_test

### Freq from total

In [None]:
for(i in 1:length(annotations_to_test)){


## Comparison Dia T0 vs Dia T1
    j = 2
    k = 3
df  <- df4  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_total)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = "Dia T0 vs Dia T1",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[1],
                         mean_ctrl = df2$mean[2], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% 
mutate(ratio = (mean_dia)/(mean_ctrl))
    
if(i == 1){
    df_final  <- df_all
} else {
    df_final  <- rbind(df_final, df_all)
}

## Comparison of Dia vs controls in both times

    j = 2
    k = 1
df  <- df4  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_total)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = paste(comparisons_to_test[j], "vs", comparisons_to_test[k]),
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
    df_final  <- rbind(df_final, df_all)
    

    j = 3
    k = 1
df  <- df4  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_total)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = paste(comparisons_to_test[j], "vs", comparisons_to_test[k]),
                         estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
    df_final  <- rbind(df_final, df_all)


## Comparison of Dia - partial remission vs. no remission

df  <- df4  %>% 
    filter(annotations == annotations_to_test[i] & Disease == "Dia" & !is.na(Condition2))  %>% 
    dplyr::select(Condition = Condition2, value = pct_from_total)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = "PR_0 vs PR_1",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[1],
                         mean_ctrl = df2$mean[2], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
     
    df_final  <- rbind(df_final, df_all)
    
    }

In [None]:
df_final  %>% arrange(pval)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)
df_final  %>% 
mutate(color = ifelse(estimate < 0 & upper < 0, 
                      "1", 
                      ifelse(estimate > 0 & lower > 0, "2", "3")))  %>% 
  ggplot(aes(estimate, name, color = color)) +
  geom_vline(xintercept = 0, color = "gray75") +
  geom_linerange(aes(xmin = lower, xmax = upper),
                 size = 1.5,
                 alpha = 0.5) +
  geom_point(size = 4) +
  theme_minimal(base_size = 16) +
  scale_color_manual(values = c("green4", "red3", "grey"), guide = "none") +
facet_wrap(~comparison, ncol = 4) +
  labs(title = "", y = NULL,
       x = "Probability \n(95% Confidence Intervals)") +
  theme(axis.text.y = element_text(hjust = 0, size = 18),
        panel.grid = element_blank()) + ggtitle("Pct from Total")

In [None]:
write.csv(df_final, "../tables/populations_freq/cd4_pct_from_total.csv")

### Freq from parent

In [None]:
annotations_to_test  <- df4$annotations  %>% unique()
annotations_to_test  <- annotations_to_test[3:length(annotations_to_test)]

In [None]:
df5  <- df4  %>% filter(Level != "L1")

for(i in c(1:9,11:length(annotations_to_test))){
print(i)

## Comparison Dia T0 vs Dia T1
    j = 2
    k = 3
df  <- df5  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_parent)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = "Dia T0 vs Dia T1",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[1],
                         mean_ctrl = df2$mean[2], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% 
mutate(ratio = (mean_dia)/(mean_ctrl))
    
if(i == 1){
    df_final  <- df_all
} else {
    df_final  <- rbind(df_final, df_all)
}

## Comparison of Dia vs controls in both times

    j = 2
    k = 1
df  <- df5  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_parent)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = paste(comparisons_to_test[j], "vs", comparisons_to_test[k]),
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
    df_final  <- rbind(df_final, df_all)
    

    j = 3
    k = 1
df  <- df5  %>% 
    filter(annotations == annotations_to_test[i] & Condition %in% c(comparisons_to_test[j], comparisons_to_test[k]))  %>% 
    dplyr::select(Condition, value = pct_from_parent)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = paste(comparisons_to_test[j], "vs", comparisons_to_test[k]),
                         estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[2],
                         mean_ctrl = df2$mean[1], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
    df_final  <- rbind(df_final, df_all)


## Comparison of Dia - partial remission vs. no remission

df  <- df5  %>% 
    filter(annotations == annotations_to_test[i] & Disease == "Dia" & !is.na(Condition2))  %>% 
    dplyr::select(Condition = Condition2, value = pct_from_parent)  %>% mutate(value = as.numeric(value))
    wcx  <- wilcox.test(df$value ~ df$Condition, conf.int = T)
    df2  <- df  %>% group_by(Condition)  %>% summarise(mean = mean(value), sd = sd(value))
    df_all  <- data.frame(name = annotations_to_test[i], 
                          comparison = "PR_0 vs PR_1",
                          estimate = wcx$estimate,
                          pval = wcx$p.value, 
                          mean_dia = df2$mean[1],
                         mean_ctrl = df2$mean[2], 
                         upper = wcx$conf.int[2],
                         lower = wcx$conf.int[1]
                         )  %>% mutate(ratio = (mean_dia)/(mean_ctrl))
     
    df_final  <- rbind(df_final, df_all)
    
    }

df_final  %>% arrange(pval)

options(repr.plot.width = 20, repr.plot.height = 20)
df_final  %>% 
mutate(color = ifelse(estimate < 0 & upper < 0, 
                      "1", 
                      ifelse(estimate > 0 & lower > 0, "2", "3")))  %>% 
  ggplot(aes(estimate, name, color = color)) +
  geom_vline(xintercept = 0, color = "gray75") +
  geom_linerange(aes(xmin = lower, xmax = upper),
                 size = 1.5,
                 alpha = 0.5) +
  geom_point(size = 4) +
  theme_minimal(base_size = 16) +
  scale_color_manual(values = c("green4", "red3", "grey"), guide = "none") +
facet_wrap(~comparison, ncol = 4) +
  labs(title = "", y = NULL,
       x = "Probability \n(95% Confidence Intervals)") +
  theme(axis.text.y = element_text(hjust = 0, size = 18),
        panel.grid = element_blank())

write.csv(df_final, "../tables/populations_freq/cd4_pct_from_parent.csv")

In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)
df_final  %>% 
mutate(color = ifelse(estimate < 0 & upper < 0, 
                      "1", 
                      ifelse(estimate > 0 & lower > 0, "2", "3")))  %>% 
  ggplot(aes(estimate, name, color = color)) +
  geom_vline(xintercept = 0, color = "gray75") +
  geom_linerange(aes(xmin = lower, xmax = upper),
                 size = 1.5,
                 alpha = 0.5) +
  geom_point(size = 4) +
  theme_minimal(base_size = 16) +
  scale_color_manual(values = c("green4", "red3", "grey"), guide = "none") +
facet_wrap(~comparison, ncol = 4) +
  labs(title = "", y = NULL,
       x = "Probability \n(95% Confidence Intervals)") +
  theme(axis.text.y = element_text(hjust = 0, size = 18),
        panel.grid = element_blank()) + ggtitle("Pct from Parent")

# Frequencies in youngest patients

In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)

df4   %>% 
filter(Age_group == 1)  %>% 
ggplot(aes(x = Condition,
             y = pct_from_parent)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0, height = 0), 
                size = 1, stackdir='center', aes(color = Condition)) + 
  theme_classic() + xlab("") + ylab("Value") +
facet_wrap(~factor(annotations, labels = gsub(levels(factor(annotations)), 
                                              pattern = "---", replacement = "\n")),
           scales = "free", ncol = 6) +
 ylim(0,NA) +
ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 22),
          axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(colour = "black")) + ggtitle("CD4 Pct from parent - youngest only")

In [None]:
options(repr.plot.width = 16, repr.plot.height = 20)

df4   %>% 
filter(Age_group == 1)  %>% 
ggplot(aes(x = Condition,
             y = pct_from_total)) +
   geom_dotplot(binaxis='y', stackdir='center', dotsize = 0) + 
   geom_boxplot(outlier.shape = NA) +
    geom_jitter(binaxis='y', position=position_jitter(width = 0, height = 0), 
                size = 1, stackdir='center', aes(color = Condition)) + 
  theme_classic() + xlab("") + ylab("Value") +
facet_wrap(~factor(annotations, labels = gsub(levels(factor(annotations)), 
                                              pattern = "---", replacement = "\n")),
           scales = "free", ncol = 6) +
 ylim(0,NA) +
ggpubr::stat_compare_means(label.x = 1.2, label.y.npc = "top", size = 3, vjust = 0.3, label = "p.format") + 
      theme(plot.title = element_text(hjust = 0.5, size = 22),
          axis.line = element_line(colour = "black"), 
        axis.ticks = element_line(colour = "black")) + ggtitle("CD4 Pct from total  - youngest only")

# DE genes in CD4 populations

## Calculation of DE genes

In [None]:
cd4_l3_list  <- list(cd4_l3_naive, cd4_l3_tfh, cd4_l3_th1th17, cd4_l3_nfkb, cd4_l3_th2,
                     cd4_l3_treg, cd4_l3_isaghi, cd4_l3_proliferating, cd4_l3_temra,
                     cd4_l2_unc, cd4_l2_subcluster, cd4_l1_full_filt)

names(cd4_l3_list)  <- c("cd4_l3_naive", "cd4_l3_tfh", "cd4_l3_th1th17", "cd4_l3_nfkb", "cd4_l3_th2",
                     "cd4_l3_treg", "cd4_l3_isaghi", "cd4_l3_proliferating", "cd4_l3_temra",
                     "cd4_l2_unc", "cd4_l2_subcluster", "cd4_l1_full_filt")

In [None]:
md_for_remission  <- cd4_l1_full_filt@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, part_remission_y_n)  %>% 
tally %>% mutate(Condition2 = paste0("PR_",part_remission_y_n))  %>% 
dplyr::select(-n, -part_remission_y_n)

In [None]:
md_for_remission

In [None]:
for( i in 1:length(cd4_l3_list)) {
    
    cd4_l3_list[[i]]$Condition2  <- NULL
    cd4_l3_list[[i]]@meta.data  <- cd4_l3_list[[i]]@meta.data  %>% 
    left_join(md_for_remission)  %>% mutate(Condition2 = paste(Condition2, Time))  %>% 
    mutate(Condition2 = ifelse(grepl(Condition2, pattern = "NA"), NA_character_,Condition2))
    
    print(cd4_l3_list[[i]]$Condition2  %>% table)
    rownames(cd4_l3_list[[i]]@meta.data)  <- colnames(cd4_l3_list[[i]])
}

### PR in T0 vs T1 and Ketoacidosis

In [None]:
md_for_ketoacidosis  <- cd4_l1_full_filt@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, ph_man)  %>% 
tally %>% mutate(Ketoacidosis = ifelse(ph_man<7.3,"Keto_1","Keto_0"))  %>% 
dplyr::select(-ph_man, -n)

In [None]:
md_for_ketoacidosis

In [None]:
for( i in 1:length(cd4_l3_list)) {
    
    #cd4_l3_list[[i]]$Ketoacidosis  <- NULL
    cd4_l3_list[[i]]@meta.data  <- cd4_l3_list[[i]]@meta.data  %>% 
    left_join(md_for_ketoacidosis)   %>% mutate(Keto_Time = paste(Ketoacidosis, Time))  %>% 
    mutate(Keto_Time = ifelse(grepl(Keto_Time, pattern = "NA"), NA_character_,Keto_Time))
    
    print(cd4_l3_list[[i]]$Ketoacidosis  %>% table)
    rownames(cd4_l3_list[[i]]@meta.data)  <- colnames(cd4_l3_list[[i]])
}

In [None]:
cd4_l3_list[[i]]$Keto_Time  %>% table

### Ketoacidosis in T0

In [None]:
Conditions  <- list(c("Dia T0", "Ctrl T0"),
                    c("Dia T0", "Dia T1"),
                    c("Dia T1", "Ctrl T0"),
                    c("PR_0 T0", "PR_1 T0"),
                    c("PR_0 T1", "PR_1 T1"),
                    c("PR_0 T0", "PR_0 T1"),
                    c("PR_1 T0", "PR_1 T1"),
                    c("Keto_1 T0", "Keto_0 T0"))

In [None]:
Conditions  %>% length

In [None]:
dataset_list  <- cd4_l3_list

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Markers_DataFrame  <- function(j){
     print("#######################")
    print(i)
    print(j)
    
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- dataset_list[[i]]
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) & 
                  Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))
        print(ds$Condition  %>% table)
    
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Condition
        markers_sc  <- FindAllMarkers(ds, only.pos = T)
        markers_sc$source  <- "scRNAseq_RNA"
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    ## scRNAseq - collecTRI
    DefaultAssay(ds)  <- "CollecTRI"
        Idents(ds)  <- ds$Condition
        markers_sc2  <- FindAllMarkers(ds, only.pos = T)
        markers_sc2$source  <- "scRNAseq_collecTRI"
        rownames(markers_sc2)  <- NULL
    print(paste("DE CollecTRI: ", nrow(markers_sc2)))
    
    if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
        print("..")
        markers2$dataset  <- names(dataset_list)[i]
        return(markers2)
}


In [None]:
for(i in 1:length(dataset_list)){

print("######################################################################")
if(i > 1){
suppressWarnings(rm(markers_sc, markers, markers1, markers_sc2, markers_sc_predia, markers_bulk, markers_bulk2))    
}
    
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 
    
if(i>1){ 
    all_markers  <- rbind(all_markers, mrk)
} else {
    all_markers  <- mrk
}
    
    
}

## DeSeq2

In [None]:
Conditions  <- list(c("Dia T0", "Ctrl T0"),
                    c("Dia T0", "Dia T1"),
                    c("Dia T1", "Ctrl T0"),
                    c("PR_0 T0", "PR_1 T0"),
                    c("PR_0 T1", "PR_1 T1"),
                    c("PR_0 T0", "PR_0 T1"),
                    c("PR_1 T0", "PR_1 T1"),
                    c("Keto_1 T0", "Keto_0 T0"))

Conditions  %>% length

dataset_list  <- cd4_l3_list

# Function to calculate markers for a subsetted da for given conditions

In [None]:
Create_Markers_DataFrame  <- function(j){
     print("#######################")
    print(i)
    print(j)
    
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- dataset_list[[i]]
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) & 
                  Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))
        print(ds$Condition  %>% table)
    
    ## DESeq2
     patient_metadata2  <- ds@meta.data  %>% 
    dplyr::select(Sample_ID, Patient_ID, Condition, Disease, Time, Experiment_ID)  %>% 
    unique  %>% ungroup
    avgexp = AggregateExpression(ds,
                           return.seurat = T, group.by = "Sample_ID", 
                          assay = "RNA", slot = "counts")
    avgexp$Sample_ID  <- as.numeric(colnames(avgexp))
    avgexp@meta.data  <- avgexp@meta.data   %>% left_join(patient_metadata2)
    rownames(avgexp@meta.data)  <- colnames(avgexp)

    Idents(avgexp)  <- avgexp$Condition

    mrk_deseq  <- FindAllMarkers(avgexp, min.pct = 0.05, test.use = "DESeq2", only.pos = T)
    
    print(paste("DE RNA: ", nrow(mrk_deseq)))
    
        if(nrow(mrk_deseq)>0){
            markers2  <- mrk_deseq
           
            } else {
          markers2  <- data.frame(p_val = NA_integer_, avg_log2FC = NA_integer_, pct.1 = NA_integer_,
                                        pct.2 = NA_integer_, p_val_adj = NA_integer_, cluster = "", gene = "")
                }
        markers2$source  <- "DESeq2"
        rownames(markers2)  <- NULL
       markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
       print("..")
       markers2$dataset  <- names(dataset_list)[i]
       return(markers2)
}

In [None]:
for(i in 1:length(dataset_list)){

print("######################################################################")
if(i > 1){
suppressWarnings(rm(markers_sc, markers, markers1, markers_sc2, markers_sc_predia, markers_bulk, markers_bulk2))    
}
    
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 
    
if(i>1){ 
    all_markers  <- rbind(all_markers, mrk)
} else {
    all_markers  <- mrk
}
    
    
}

In [None]:
all_markers  %>% arrange(p_val_adj)  %>% dplyr::filter(dataset == "cd4_l1_full_filt" & test_type == "Dia T0 vs Ctrl T0")

In [None]:
dir.create("../tables/DESeq_markers/")

In [None]:
write.csv(all_markers  %>% arrange(p_val_adj), "../tables/DESeq_markers/cd4_deseq.csv")

In [None]:
avgexp = AverageExpression(subset(cd4_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = F, group.by = "Patient_Time", 
                          assay = "RNA")



In [None]:
plot_gene  <- function(gene){
    df  <- as.data.frame(avgexp$RNA[which(rownames(avgexp$RNA)==gene),])  %>% rownames_to_column("Patient_Time")
    colnames(df)[2]  <- "gene2"
    df  <-  df  %>% separate(Patient_Time, into = c("Patient","Time"), sep = " ", remove = F)  %>% 
    mutate(group = ifelse(substr(Patient,1,1)=="1","Dia","Ctrl"))  %>% 
    mutate(Condition = paste(group, Time))
   # print(df)
 p  <-    df  %>% ggplot(aes(x = Condition, y = gene2)) + 
 geom_violin(alpha = 0.3, aes(fill = Condition), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
  ylim(0,NA) +
  theme_classic() +
theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) +
  ggtitle(gene) + ggtheme()
    
    p2  <-    df  %>% dplyr::filter(Patient != 116)  %>% ggplot(aes(x = Condition, y = gene2)) + 
 geom_violin(alpha = 0.3, aes(fill = Condition), scale = "width") + 
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(2,3)), paired = TRUE) +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
  ylim(0,NA) +
  theme_classic() +
theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) +
  ggtitle(gene) + ggtheme()
    p  <- p + p2
    return(p)
    }

In [None]:
plot_gene("CCL4")

In [None]:
plot_gene("ASCL2")

In [None]:
plot_gene("NFKBID")

## DE genes with correction for sex-related genes

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Sex_Markers_DataFrame  <- function(i){
    
    ds  <- dataset_list[[i]]
   
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Sex
        markers_sc  <- FindAllMarkers(ds, only.pos = T)
        markers_sc$source  <- "Sex_RNA"
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    ## scRNAseq - collecTRI
    DefaultAssay(ds)  <- "CollecTRI"
        Idents(ds)  <- ds$Sex
        markers_sc2  <- FindAllMarkers(ds, only.pos = T)
        markers_sc2$source  <- "Sex_collecTRI"
        rownames(markers_sc2)  <- NULL
    print(paste("DE CollecTRI: ", nrow(markers_sc2)))
    
    if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- "Sex"
        print("..")
        markers2$dataset  <- names(dataset_list)[i]
        return(markers2)
}


In [None]:

    
mrk  <- map(.x = 1:length(dataset_list), Create_Sex_Markers_DataFrame)

mrk  <- bind_rows(mrk) 


In [None]:
mrk

In [None]:
dir.create("../tables/de_genes")

write.csv(all_markers, "../tables/de_genes/240319_markers_full_cd4_with_collecTRI.csv", row.names = F)

In [None]:
write.csv(mrk, "../tables/de_genes/240319_markers_full_cd4_sex.csv", row.names = F)

In [None]:
# Create filtered marker list without Sex genes

for(i in 1:length(dataset_list)){

filt_df  <- all_markers  %>% dplyr::filter(dataset == names(dataset_list)[i])
genes_to_remove  <- mrk  %>% dplyr::filter(dataset == names(dataset_list)[i])  %>% pull(gene) 

markers_without_sex  <- dplyr::filter(filt_df, !(gene %in% genes_to_remove))
     
if(i>1){ 
    all_markers_without_sex  <- rbind(all_markers_without_sex, markers_without_sex)
} else {
    all_markers_without_sex  <- markers_without_sex
}
    
    
}

In [None]:
nrow(all_markers)

In [None]:
nrow(all_markers_without_sex)

In [None]:
write.csv(all_markers_without_sex, "../tables/de_genes/240319_cd4_all_markers_without_sex.csv", row.names = F)

In [None]:
all_markers_without_sex$dataset %>% table

## DE genes in SNP variant

In [None]:
snp_meta  <- read_csv("../data/snp_meta_our_paitents.csv")

In [None]:
snp_meta$`...1`  <- NULL

In [None]:
snp_meta

In [None]:
snp_meta$PTPN22_rs2476601 %>% table
snp_meta$IFIH1_rs1990760 %>% table
snp_meta$CD226_rs763361 %>% table
snp_meta$CD69_rs4763879 %>% table
snp_meta$TYK2_rs2304256 %>% table
snp_meta$UBASH3A_rs876498 %>% table

In [None]:
snp_meta  <- snp_meta  %>% dplyr::select(Patient_ID, PTPN22_rs2476601, IFIH1_rs1990760,
                                         CD226_rs763361, CD69_rs4763879,
                                        TYK2_rs2304256, UBASH3A_rs876498)

In [None]:
snp_meta$PTPN22_rs2476601   <- ifelse(snp_meta$PTPN22_rs2476601 %in% c("AA","AG","GG"), snp_meta$PTPN22_rs2476601, NA_character_)

In [None]:
snp_meta$TYK2_rs2304256 %>% table

In [None]:
snp_meta$TYK2_rs2304256   <- ifelse(snp_meta$TYK2_rs2304256 %in% c("AA","AC","CC"), snp_meta$TYK2_rs2304256, NA_character_)

In [None]:
snp_meta$CD226_rs763361  <- ifelse(snp_meta$CD226_rs763361 %in% c("CC","CT","TT"), snp_meta$CD226_rs763361, NA_character_)

In [None]:
snp_meta$CD69_rs4763879  <- ifelse(snp_meta$CD69_rs4763879 %in% c("AA","AG","GG"), snp_meta$CD69_rs4763879, NA_character_)

In [None]:
snp_meta$UBASH3A_rs876498 %>% table

In [None]:
snp_meta$UBASH3A_rs876498  <- ifelse(snp_meta$UBASH3A_rs876498 %in% c("AA","AG","GG"), snp_meta$UBASH3A_rs876498, NA_character_)

In [None]:
snp_meta$PTPN22_rs2476601 %>% table
snp_meta$IFIH1_rs1990760 %>% table
snp_meta$CD226_rs763361 %>% table
snp_meta$CD69_rs4763879 %>% table
snp_meta$TYK2_rs2304256 %>% table
snp_meta$UBASH3A_rs876498 %>% table

In [None]:
snp_meta

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_SNP_Markers_DataFrame  <- function(i){
    
    ds  <- dataset_list[[i]]
    
    for(j in 2:ncol(snp_meta)){
        
    one_snp_meta  <- dplyr::select(snp_meta, 1,j)
    one_snp_meta$Patient_ID  <- as.character(one_snp_meta$Patient_ID)
        colnames(one_snp_meta)[2]  <- "variant"
    ds@meta.data  <- left_join(ds@meta.data, one_snp_meta)
    rownames(ds@meta.data)  <- colnames(ds)
    
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$variant
        markers_sc  <- FindAllMarkers(ds, only.pos = F)
        markers_sc$source  <- paste(colnames(snp_meta)[j], "RNA")
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    ## scRNAseq - collecTRI
    DefaultAssay(ds)  <- "CollecTRI"
       Idents(ds)  <- ds$variant
        markers_sc2  <- FindAllMarkers(ds, only.pos = F)
        markers_sc2$source  <- paste(colnames(snp_meta)[j], "CollecTRI")
        rownames(markers_sc2)  <- NULL
    print(paste("DE CollecTRI: ", nrow(markers_sc2)))
    
    if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
        
        if(j>2){
            markers3  <- rbind(markers3, markers2)
        } else {
        markers3  <- markers2    
        }
        
        ds$variant  <- NULL
        
        }
    
        markers3$test_type  <- "SNP"
        print("..")
        markers3$dataset  <- names(dataset_list)[i]
        return(markers3)
}


In [None]:
dataset_list  <- cd4_l3_list

In [None]:
   
mrk  <- map(.x = 1:length(dataset_list), Create_SNP_Markers_DataFrame)

mrk  <- bind_rows(mrk) 


In [None]:
mrk

In [None]:
write.csv(mrk, "../tables/de_genes/240315_snp_cd4.csv")

In [None]:
mrk  %>% dplyr::filter(gene %in% c("PTPN22","IFIH1","CD69","CD226","UBASH3A","TYK2"))

In [None]:
mrk  %>% group_by(cluster, source)  %>% tally  %>% arrange(source)

In [None]:
mrk  %>% filter(grepl(source, pattern = "RNA")) %>% group_by(gene)  %>% tally  %>% arrange(desc(n)) 

## Analysis of DE genes

In [None]:
library(EnsDb.Hsapiens.v86)

In [None]:
geneIDs1 <- ensembldb::select(EnsDb.Hsapiens.v86, keys= all_markers_without_sex$gene, 
                              keytype = "SYMBOL", columns = c("SYMBOL","ENTREZID"))

In [None]:
geneIDs1

In [None]:
colnames(geneIDs1)  <- c("gene", "entrezid")

In [None]:
all_markers_without_sex

## Ctrl vs Dia T0

### RNA

In [None]:
genes_Dia_vs_Ctrl_T0  <- all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_RNA")  %>% 
group_by(gene)  %>% tally()  %>% arrange(desc(n))  %>% pull(gene)

In [None]:
genes_Dia_vs_Ctrl_T0

In [None]:
genes_Dia_vs_Ctrl_T0  %>% length

In [None]:
lvl  <- all_markers_without_sex  %>% dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_RNA")  %>% 
group_by(gene, cluster)  %>% arrange(cluster)  %>% pull(gene)  %>% unique

In [None]:
all_markers_without_sex  %>% dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_RNA")  %>% 
group_by(gene)  %>% tally  %>% arrange(desc(n))

In [None]:
is.more.than.one.cluster  <- all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_RNA")  %>% 
group_by(gene)  %>% tally  %>% arrange(desc(n))  %>% dplyr::filter(n>1)  %>% pull(gene)

In [None]:
is.more.than.one.cluster

In [None]:
options(repr.plot.height = 4, repr.plot.width = 20)
all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & 
              gene %in% genes_Dia_vs_Ctrl_T0 & 
              source == "scRNAseq_RNA" )  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene, levels = lvl), dataset)) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = cluster_source)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_text(angle = 90))

In [None]:
options(repr.plot.height = 4, repr.plot.width = 20)
all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & 
              gene %in% genes_Dia_vs_Ctrl_T0 & 
              source == "scRNAseq_RNA" & gene %in% is.more.than.one.cluster)  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene, levels = lvl), dataset)) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = cluster_source)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_text(angle = 90))

### CollecTRI

In [None]:
genes_Dia_vs_Ctrl_T0  <- all_markers_without_sex  %>% dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_collecTRI")  %>% 
group_by(gene)  %>% tally()  %>% arrange(desc(n))  %>% pull(gene)

In [None]:
genes_Dia_vs_Ctrl_T0

In [None]:
lvl  <- all_markers_without_sex  %>% dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_collecTRI")  %>% 
group_by(gene, cluster)  %>% arrange(cluster)  %>% pull(gene)  %>% unique

In [None]:
all_markers_without_sex  %>% dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_collecTRI")  %>% 
group_by(gene)  %>% tally  %>% arrange(desc(n))

In [None]:
is.more.than.one.cluster  <- all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & source == "scRNAseq_collecTRI")  %>% 
group_by(gene)  %>% tally  %>% arrange(desc(n))  %>% dplyr::filter(n>1)  %>% pull(gene)

In [None]:
is.more.than.one.cluster

In [None]:
options(repr.plot.height = 4, repr.plot.width = 20)
all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & 
              gene %in% genes_Dia_vs_Ctrl_T0 & 
              source == "scRNAseq_collecTRI" )  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene, levels = lvl), dataset)) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = cluster_source)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_text(angle = 90))

In [None]:
options(repr.plot.height = 4, repr.plot.width = 30)
all_markers_without_sex  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & 
              gene %in% genes_Dia_vs_Ctrl_T0 & 
              source == "scRNAseq_collecTRI" & gene %in% is.more.than.one.cluster)  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene, levels = lvl), dataset)) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = cluster_source)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_text(angle = 90))

### GSEA

In [None]:
gsea_dia_t0_vs_ctrl_t0  <- all_markers3  %>% 
                        dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & !is.na(entrezid))  %>% 
                        group_by(entrezid, avg_log2FC)  %>% 
                        tally()  %>% 
                        arrange(desc(n))  %>% 
                        pull(entrezid)

gsea_dia_t0_vs_ctrl_t0_value  <- all_markers3  %>% 
                        dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & !is.na(entrezid))  %>% 
                        group_by(entrezid, avg_log2FC)  %>% 
                        tally()  %>% 
                        arrange(desc(n))  %>% 
                        pull(avg_log2FC)

In [None]:
names(gsea_dia_t0_vs_ctrl_t0_value)  <- gsea_dia_t0_vs_ctrl_t0

In [None]:
gsea_dia_t0_vs_ctrl_t0_value  <- gsea_dia_t0_vs_ctrl_t0_value[rev(order(gsea_dia_t0_vs_ctrl_t0_value))]

In [None]:
gsea_dia_t0_vs_ctrl_t0_value

In [None]:
library(msigdbr)

In [None]:
C3_t2g <- msigdbr(species = "Homo sapiens", category = "C5") %>% 
  dplyr::select(gs_name, entrez_gene)
head(C3_t2g)

In [None]:
em2 <- GSEA(gsea_dia_t0_vs_ctrl_t0_value, TERM2GENE = C3_t2g)
head(em2)

In [None]:
em2 

In [None]:
options(repr.plot.width = 8, repr.plot.height = 6)
dotplot(em2, showCategory=30) + ggtitle("CD4 Dia T0 vs Ctrl T0")

### Enrichment

In [None]:
library(clusterProfiler)

In [None]:
library("org.Hs.eg.db")

In [None]:
up_dia_t0_vs_ctrl_t0  <- all_markers_without_sex  %>% 
                        dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & cluster == "Dia T0")  %>% 
                        group_by(gene)  %>% 
                        tally()  %>% 
                        arrange(desc(n))  %>% 
                        pull(gene)
down_dia_t0_vs_ctrl_t0  <- all_markers_without_sex  %>% 
                        dplyr::filter(test_type == "Ctrl T0 vs Dia T0" & cluster == "Ctrl T0")  %>% 
                        group_by(gene)  %>% 
                        tally()  %>% 
                        arrange(desc(n))  %>% 
                        pull(gene)

In [None]:
up_dia_t0_vs_ctrl_t0

In [None]:
ego2 <- enrichGO(gene         = up_dia_t0_vs_ctrl_t0,
                OrgDb         = org.Hs.eg.db,
                keyType       = 'SYMBOL',
                ont           = "ALL",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05)
head(ego2, 3)         

options(repr.plot.width = 20, repr.plot.height = 5)

edox2 <- pairwise_termsim(ego2)
p1 <- treeplot(edox2)
p2 <- treeplot(edox2, hclust_method = "average")
p1 + ggtitle("Enriched in Dia")

In [None]:
ego2  %>% as.data.frame()

In [None]:
down_dia_t0_vs_ctrl_t0

In [None]:
ego2 <- enrichGO(gene         = down_dia_t0_vs_ctrl_t0,
                OrgDb         = org.Hs.eg.db,
                keyType       = 'SYMBOL',
                ont           = "ALL",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05)
head(ego2, 3)         

options(repr.plot.width = 20, repr.plot.height = 5)

edox2 <- pairwise_termsim(ego2)
p1 <- treeplot(edox2)
p2 <- treeplot(edox2, hclust_method = "average")
p1 + ggtitle("Enriched in Ctrl")

In [None]:
ego2  %>% as.data.frame()

In [None]:
edox <- setReadable(ego2, 'org.Hs.eg.db', 'ENTREZID')

p3 <- cnetplot(edox, circular = TRUE, colorEdge = TRUE) 
p3

In [None]:
as.data.frame(edox )

## Ctrl vs Dia T1

In [None]:
genes_Dia_vs_Ctrl_T1  <- all_markers  %>% dplyr::filter(test_type == "Ctrl T0 vs Dia T1")  %>% 
group_by(gene)  %>% tally()  %>% arrange(desc(n))  %>% pull(gene)

In [None]:
genes_Dia_vs_Ctrl_T1

In [None]:
lvl  <- all_markers  %>% dplyr::filter(test_type == "Ctrl T0 vs Dia T1")  %>% 
group_by(gene, cluster)  %>% arrange(cluster)  %>% pull(gene)  %>% unique

In [None]:
options(repr.plot.height = 4, repr.plot.width = 20)
all_markers  %>% 
dplyr::filter(test_type == "Ctrl T0 vs Dia T1" & gene %in% genes_Dia_vs_Ctrl_T1  & source == "scRNAseq")  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene, levels = lvl), dataset)) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = cluster_source)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
 theme(axis.text.x = element_text(angle = 90))

In [None]:
up_dia_t1_vs_ctrl_t0  <- all_markers  %>% 
                        dplyr::filter(test_type == "Ctrl T0 vs Dia T1" & cluster == "Dia T1")  %>% 
                        group_by(gene)  %>% 
                        tally()  %>% 
                        arrange(desc(n))  %>% 
                        pull(gene)
down_dia_t1_vs_ctrl_t0  <- all_markers  %>% 
                        dplyr::filter(test_type == "Ctrl T0 vs Dia T1" & cluster == "Ctrl T0")  %>% 
                        group_by(gene)  %>% 
                        tally()  %>% 
                        arrange(desc(n))  %>% 
                        pull(gene)

In [None]:
up_dia_t1_vs_ctrl_t0

In [None]:
down_dia_t1_vs_ctrl_t0salmo

### Dia T0 vs Dia T1

In [None]:
genes_Dia_vs_Dia_T1  <- all_markers  %>% dplyr::filter(test_type == "Dia T0 vs Dia T1")  %>% 
group_by(gene)  %>% tally()  %>% arrange(desc(n))  %>% pull(gene)

In [None]:
genes_Dia_vs_Dia_T1

In [None]:
lvl  <- all_markers  %>% dplyr::filter(test_type == "Dia T0 vs Dia T1")  %>% 
group_by(gene, cluster)  %>% arrange(cluster)  %>% pull(gene)  %>% unique

In [None]:
options(repr.plot.height = 4, repr.plot.width = 20)
all_markers  %>% 
dplyr::filter(test_type == "Dia T0 vs Dia T1" & gene %in% genes_Dia_vs_Dia_T1 & source == "scRNAseq")  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene, levels = lvl), dataset)) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = cluster_source)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
theme(axis.text.x = element_text(angle = 90))

In [None]:
up_dia_t0_vs_dia_t1  <- all_markers  %>% 
                        dplyr::filter(test_type == "Dia T0 vs Dia T1" & cluster == "Dia T0")  %>% 
                        group_by(gene)  %>% 
                        tally()  %>% 
                        arrange(desc(n))  %>% 
                        pull(gene)
down_dia_t0_vs_dia_t1  <- all_markers  %>% 
                        dplyr::filter(test_type == "Dia T0 vs Dia T1" & cluster == "Dia T1")  %>% 
                        group_by(gene)  %>% 
                        tally()  %>% 
                        arrange(desc(n))  %>% 
                        pull(gene)

## Table with markers

In [None]:
markers  <- c('GADD45B',
              'DUSP1', 'CD69', 'FOS', 
              'IFI6', 'ISG15', 'IFI44L', 'LY6E', 
              'FKBP5','MX1', 'TNFAIP3', 'BTN3A2',
              'DUSP2', 
               'GIMAP7', 'IER2', 'IFIT3', 'IFITM1', 'IRF1', 'JUN', 
               'OAS1', 'PDE4B', 'PTPRJ', 
               'SOCS3', 
              'STAT1', 'GIMAP4',
               'BCL11B', 'BCL2', 'BIN3', 'BTG1', 
               'CALR', 'CCR10', 'CD53', 'CD6', 'CENPK', 
              'CX3CR1', 'CXCR4', 
              'TNFSF10', 'CCL4', 'CCL5', 
              'DUSP5',
               'ENTPD6',  'FOSB',  'GZMB', 'ID2', 'ID3', 'NKG7', 
               'HLA-DPA1', 
              'HLA-DRB1','IFI44', 'IFIT1', 'IFIT2', 'IRF7', 'ITPKB', 
              'JUNB',  'LEF1', 'LGALS1', 
              'TNFSF10', 'NFKBIA', 
              'NIBAN1',  'PDE4D', 'CD27','GNLY', 'GZMH','TNF','TNFAIP8L2', 'TRAF3IP3', 
              'IL16', 'IL7R','ORAI1', 
              'PIK3R1', 'PRSS2', 'PRSS23',  'PTPRCAP', 'S1PR1', 'TCF7', 'TIGIT', 'TMEM256', 'TNIP1', 'TSC22D3')

In [None]:
markers  <- c('TAB1', 'CHUK', 'TAB2', 'TAB3', 'IKBKB', 'NFKB1', 'NFKBIA', 'RELA', 'MAP3K7', 
                    'TNF', 'TNFRSF1A', 'TRAF2', 'TRAF5', 'IKBKG', 'TRADD', 'RIPK1')[which(c('TAB1', 'CHUK', 'TAB2', 'TAB3', 'IKBKB', 'NFKB1', 'NFKBIA', 'RELA', 'MAP3K7', 
                    'TNF', 'TNFRSF1A', 'TRAF2', 'TRAF5', 'IKBKG', 'TRADD', 'RIPK1') %in% rownames(cd4_l1_full_filt@assays$RNA))]

In [None]:
all_markers  %>%
    mutate(avg_log2FC = ifelse(cluster == "Ctrl T0",-1*avg_log2FC, avg_log2FC))  %>% 
    dplyr::filter(source == "scRNAseq" & test_type != "Cpept_HI_T1 vs Cpept_LO_T1" &
                 test_type != "Cpept_HI_T0 vs Cpept_LO_T0")  %>% 
pivot_wider(names_from = cluster, values_from = avg_log2FC, values_fn = mean)

In [None]:
cd4_l1_full_filt

In [None]:
Idents(cd4_l1_full_filt)  <- cd4_l1_full_filt$Condition

In [None]:
fc1  <- FoldChange(cd4_l1_full_filt, assay = "RNA",
                   `ident.1` = "Dia T0", `ident.2` = "Ctrl T0", features = markers)

In [None]:
fc1

In [None]:
fc2  <- FoldChange(cd4_l1_full_filt, assay = "RNA",
                   `ident.1` = "Dia T1", `ident.2` = "Ctrl T0", features = markers)

In [None]:
fc2

In [None]:
fc3  <- FoldChange(cd4_l1_full_filt, assay = "RNA",
                   `ident.1` = "Dia T0", `ident.2` = "Dia T1", features = markers)

In [None]:
fc3

In [None]:
fc4  <- FoldChange(cd4_l1_full_filt, assay = "RNA",
                   `ident.1` = "Dia T0", `ident.2` = "Pre-Dia T0", features = markers)

fc4

In [None]:
fc5  <- FoldChange(cd4_l1_full_filt, assay = "RNA",
                   `ident.1` = "Pre-Dia T0", `ident.2` = "Ctrl T0", features = markers)

fc5

In [None]:
fc_all  <- data.frame(DiaT0_Ctrl = fc1$avg_log2FC, 
                      DiaT1_Ctrl = fc2$avg_log2FC,
                      DiaT0_DiaT1 = fc3$avg_log2FC,
                     DiaT0_PreDia = fc4$avg_log2FC,
                     PreDia_Ctrl = fc5$avg_log2FC
                     )

In [None]:
rownames(fc_all)  <- rownames(fc1)

In [None]:
fc_all  %>% as.matrix

In [None]:
library(pheatmap)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 15)
pheatmap(fc_all  %>% as.matrix, main = "", scale = "none", cluster_cols = F, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "red"))(50), 
         border_color = "white",
                  fontsize = 9)

#NFkb

In [None]:
options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(fc_all  %>% as.matrix, main = "", scale = "none", cluster_cols = F, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "red"))(50), 
         border_color = "white",
                  fontsize = 9)

### Dia Cpep T0

In [None]:
all_markers$test_type  %>% table

In [None]:
genes_Dia_cpept_T0  <- all_markers  %>% dplyr::filter(test_type == "Cpept_HI_T0 vs Cpept_LO_T0")  %>% 
group_by(gene)  %>% tally()  %>% arrange(desc(n))  %>% pull(gene)

In [None]:
genes_Dia_cpept_T0

In [None]:
lvl  <- all_markers  %>% dplyr::filter(test_type == "Cpept_HI_T0 vs Cpept_LO_T0")  %>% 
group_by(gene, cluster)  %>% arrange(cluster)  %>% pull(gene)  %>% unique

In [None]:
options(repr.plot.height = 40, repr.plot.width = 10)
all_markers  %>% 
dplyr::filter(test_type == "Cpept_HI_T0 vs Cpept_LO_T0" & gene %in% genes_Dia_cpept_T0)  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene, levels = lvl), dataset)) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = cluster_source)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
coord_flip() + theme(axis.text.x = element_text(angle = 90))

In [None]:
up_dia_cpept_lo_vs_hi_t0  <- all_markers  %>% 
                        dplyr::filter(test_type == "Cpept_HI_T0 vs Cpept_LO_T0" & cluster == "Cpept_LO_T0")  %>% 
                        group_by(gene)  %>% 
                        tally()  %>% 
                        arrange(desc(n))  %>% 
                        pull(gene)
down_dia_cpept_lo_vs_hi_t0  <- all_markers  %>% 
                        dplyr::filter(test_type == "Cpept_HI_T0 vs Cpept_LO_T0" & cluster == "Cpept_HI_T0")  %>% 
                        group_by(gene)  %>% 
                        tally()  %>% 
                        arrange(desc(n))  %>% 
                        pull(gene)

### Dia Cpep T1

In [None]:
all_markers$test_type  %>% table

In [None]:
genes_Dia_cpept_T1  <- all_markers  %>% dplyr::filter(test_type == "Cpept_HI_T1 vs Cpept_LO_T1")  %>% 
group_by(gene)  %>% tally()  %>% arrange(desc(n))  %>% pull(gene)

In [None]:
genes_Dia_cpept_T1

In [None]:
lvl  <- all_markers  %>% dplyr::filter(test_type == "Cpept_HI_T1 vs Cpept_LO_T1")  %>% 
group_by(gene, cluster)  %>% arrange(cluster)  %>% pull(gene)  %>% unique

In [None]:
options(repr.plot.height = 40, repr.plot.width = 10)
all_markers  %>% 
dplyr::filter(test_type == "Cpept_HI_T1 vs Cpept_LO_T1" & gene %in% genes_Dia_cpept_T1)  %>%  
mutate(cluster_source = paste(cluster, source))  %>% 
ggplot(aes(factor(gene, levels = lvl), dataset)) + 
  geom_point(aes(size = -log(p_val_adj, base = 10), colour = cluster_source)) + 
  theme_bw()  + scale_size_continuous(range=c(1,4)) +
  #scale_colour_gradient2(low = "lightskyblue", mid = "lightsteelblue2", high = "salmon") + 
coord_flip() + theme(axis.text.x = element_text(angle = 90))

In [None]:
up_dia_cpept_lo_vs_hi_T1  <- all_markers  %>% 
                        dplyr::filter(test_type == "Cpept_HI_T1 vs Cpept_LO_T1" & cluster == "Cpept_LO_T1")  %>% 
                        group_by(gene)  %>% 
                        tally()  %>% 
                        arrange(desc(n))  %>% 
                        pull(gene)
down_dia_cpept_lo_vs_hi_T1  <- all_markers  %>% 
                        dplyr::filter(test_type == "Cpept_HI_T1 vs Cpept_LO_T1" & cluster == "Cpept_HI_T1")  %>% 
                        group_by(gene)  %>% 
                        tally()  %>% 
                        arrange(desc(n))  %>% 
                        pull(gene)

## Ctrl T0 vs Dia T0 pathways

In [None]:
all_markers$dataset  %>% table

In [None]:
all_mrk1  <- all_markers  %>% dplyr::filter(test_type == "Ctrl T0 vs Dia T0")

In [None]:
all_mrk1

In [None]:
dia_up  <- all_mrk1  %>% dplyr::filter(cluster == "Dia T0")  %>% pull(gene)  %>% unique

In [None]:
dia_up

In [None]:
library(ReactomePA)
library(clusterProfiler)
library(org.Hs.eg.db)

In [None]:

ego2 <- enrichGO(gene         = dia_up,
                OrgDb         = org.Hs.eg.db,
                keyType       = 'SYMBOL',
                ont           = "ALL",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05)
head(ego2, 3)         

In [None]:
dia_down  <- all_mrk1  %>% dplyr::filter(cluster == "Ctrl T0")  %>% pull(gene)  %>% unique

dia_down

library(ReactomePA)
library(clusterProfiler)
library(org.Hs.eg.db)


ego2 <- enrichGO(gene         = dia_down,
                OrgDb         = org.Hs.eg.db,
                keyType       = 'SYMBOL',
                ont           = "ALL",
                pAdjustMethod = "BH",
                pvalueCutoff  = 0.01,
                qvalueCutoff  = 0.05)
head(ego2, 3)         

In [None]:
library(DOSE)
data(geneList)
de <- names(geneList)[abs(geneList) > 2]

edo <- enrichDGN(de)

# Populations - correlations with C-peptide

In [None]:
fast  <- cd4_l1_full_filt@meta.data  %>% dplyr::select(Sample_ID, fasting_cpept_T1)  %>% unique

In [None]:
fast

In [None]:
mtx3

In [None]:
populations  <- mtx3  %>% 
 left_join(fast)

In [None]:
colnames(populations)

In [None]:
populations

In [None]:
calc_correlation  <- function(i){
    df  <- populations  %>% dplyr::select(i,57)  %>% dplyr::filter(!is.na(57))
    colnames(df)  <- c("value","fasting_cpept_T1")
    cor  <- cor.test(df$value, df$fasting_cpept_T1)
    res_df  <- data.frame(population = colnames(populations)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*46>1,1,cor$p.value*46))
    return(res_df)
}

In [None]:
test  <- future_map(11:56, calc_correlation)

In [None]:
test2  <- bind_rows(test)

In [None]:
test2  %>% arrange(pval)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 4)
for(i in (test2  %>% arrange(pval)  %>% pull(population))[1:10]){
    df2 <- populations  %>% dplyr::select(which(colnames(populations)==i),fasting_cpept_T1 = 57)  %>% 
     dplyr::filter(!is.na(fasting_cpept_T1))
    colnames(df2)  <- c("value", "fasting_cpept_T1")
    p  <- df2 %>%  ggplot(aes(x=value, y=fasting_cpept_T1)) +
  geom_point(shape = 16, size = 2) +
 geom_smooth(method=lm) + ggtitle(i) 
    print(p)
    }

In [None]:
df2 <- populations  %>% dplyr::select(which(colnames(populations)==3),fasting_cpept_T1 = 57) 

In [None]:
    colnames(df2)  <- c("value", "fasting_cpept_T1")

In [None]:
    p  <- df2 %>%  ggplot(aes(x=value, y=fasting_cpept_T1)) +
  geom_point(shape = 16, size = 2) +
 geom_smooth(method=lm) + ggtitle(i) 

# Populations - correlations with age

In [None]:
calc_correlation  <- function(i){
    df  <- populations  %>% dplyr::select(i,6)  %>% filter(!is.na(6))
    colnames(df)  <- c("value","age")
    cor  <- cor.test(df$value, df$age)
    res_df  <- data.frame(population = colnames(populations)[i], 
                          cor = cor$estimate, 
                          pval = cor$p.value, 
                         padj  = ifelse(cor$p.value*49>1,1,cor$p.value*49))
    return(res_df)
}

In [None]:
test  <- future_map(11:59, calc_correlation)

In [None]:
test2  <- bind_rows(test)

In [None]:
test2  %>% arrange(pval)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 4)
for(i in (test2  %>% arrange(pval)  %>% pull(population))[1:10]){
    df2 <- populations  %>% dplyr::select(which(colnames(populations)==i),age = 6) 
    colnames(df2)  <- c("value", "age")
    p  <- df2 %>%  ggplot(aes(x=value, y=age)) +
  geom_point(shape = 16, size = 2) +
 geom_smooth(method=lm) + ggtitle(i) 
    print(p)
    }

# Kallionpaa 

In [None]:
kallion  <- readRDS("../../231106_VN_DiabetesV03/kalinopaa_filt_stacas.rds")

In [None]:
DimPlot(kallion)

In [None]:
kallion  <- FindNeighbors(kallion)

In [None]:
kallion  <- FindClusters(kallion, res = 0.4)

In [None]:
DimPlot(kallion, label = T)

In [None]:
FeaturePlot(kallion, features = "TGFB1", min.cutoff = 0)

In [None]:
kallion_treg  <- subset(kallion, seurat_clusters == 5)

In [None]:
DimPlot(kallion_treg, label = T)

In [None]:
kallion_treg <- SCTransform(kallion_treg)
kallion_treg <- RunPCA(kallion_treg)
kallion_treg <- RunUMAP(kallion_treg, dims = 1:10)
kallion_treg <- FindNeighbors(kallion_treg)

In [None]:
kallion_treg <- FindClusters(kallion_treg, resolution = 0.3)

In [None]:
DimPlot(kallion_treg, group.by = "source")

In [None]:
FeaturePlot(kallion_treg, features = "GZMK", min.cutoff = 0)

In [None]:
kallion_treg$Condition  <- substr(kallion_treg$Condition,1,4)

In [None]:
kallion_treg$Condition  %>%  table

In [None]:
Idents(kallion_treg)  <- kallion_treg$Condition

In [None]:
mrk  <- FindAllMarkers(kallion_treg)

In [None]:
mrk

In [None]:
mrk  %>% dplyr::filter(cluster == "Cont")

In [None]:
avgexp = AverageExpression(kallion_treg, features = mrk$gene, return.seurat = F, group.by = "source", assays = "RNA")

options(repr.plot.width = 25, repr.plot.height = 3.5)
pheatmap(t(avgexp$RNA), main = "", scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
avgexp = AverageExpression(kallion_treg, features = rev(str_to_upper(c("TCF7", 
            "LAG3", "ID2","TNFAIP3","DUSP2","IER2",
            "GZMK","CD226", "MKI67","TNFRSF1B","PI3KIP1","TMA7","FTH1"
                                                          ))), return.seurat = F, group.by = "source", assays = "RNA")

options(repr.plot.width = 9, repr.plot.height = 3.5)
pheatmap(t(avgexp$RNA), main = "", scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
avgexp = AverageExpression(kallion_treg, features = str_to_upper(c(
           "IL4R", "IL10RA" )), return.seurat = F, group.by = "source", assays = "RNA")

options(repr.plot.width = 9, repr.plot.height = 3.5)
pheatmap(t(avgexp$RNA), main = "", scale = "column", cluster_cols = T, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
cd4_l3_temra  <- readRDS("../data/processed//L3/cd4_l3_temra.rds")

In [None]:
cd4_l2  <- readRDS("../data/processed/L2//cd4_l2_subcluster.rds")

In [None]:
options(repr.plot.width = 5, repr.plot.height = 4)
DefaultAssay(cd4_l2_subcluster)  <- "RNA"
FeaturePlot(cd4_l2_subcluster, features = "NCAM1", min.cutoff = 0, max.cutoff = 1)

In [None]:
VlnPlot(cd4_l2_subcluster, group.by = "annotations_manual", 
        features = "NCAM1") + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "black") 


In [None]:
DefaultAssay(cd4_l3_temra)  <- "integrated"
cd4_l3_temra  <- FindNeighbors(cd4_l3_temra)
cd4_l3_temra  <- FindClusters(cd4_l3_temra, resolution = 0.5)

In [None]:
DimPlot(cd4_l3_temra)

In [None]:
cd4_l3_temra$annotations_manual  <- cd4_l3_temra$seurat_clusters

In [None]:
df4  <- create_df4(cd4_l3_temra)

In [None]:
df4

In [None]:
df5  <- df4  %>% pivot_wider(names_from = "annotations_manual", values_from = "freq", names_prefix = "cl")  

options(repr.plot.width = 3, repr.plot.height = 5)
p1  <- df5 %>% 
filter(Condition %in% c("Ctrl T0", "Dia T0") & Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = cl5)) + # you can change the x to whatever variable you're interested in
  geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", method = "center") +
scale_shape_manual(values = c(21,22))+
  ylab("") +
  xlab("") +
  theme_classic() +
scale_fill_manual(values = c("#1874cdff","#c41515ff"))+
ggpubr::stat_compare_means(label.x= 1.2, label.y.npc = 0.9,
                           size = 7, label = "p.format")+
ggtheme() +
 scale_y_continuous(limits = c(0,NA)) +
theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data")
print(p1)
library(svglite)
#ggsave(filename = "treg_density_quantification.svg", width = 3, height = 5)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4)

DimPlot(cd4_l2_subcluster, cells.highlight = colnames(cd4_l2_subcluster)[grep(cd4_l2_subcluster$cdr3_A1, pattern = "CVVSDRGSTLGRLYF")])

# iNKT and CD3-56

In [None]:
cd4_l1_full_filt  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L1/cd4_l1_full_filt.rds")

In [None]:
library(GEOquery)

In [None]:
packageVersion('matrixStats')

In [None]:
gset <- getGEO("GSE106082", GSEMatrix =TRUE, getGPL=TRUE, AnnotGPL=TRUE)

In [None]:
gset <- readRDS("../data//geo//tr356.rds")

In [None]:
library(readr)

In [None]:
metadata <- data.frame(geo_id = gset$GSE106082_series_matrix.txt.gz$geo_accession,
                       cell_type = gset$GSE106082_series_matrix.txt.gz$`characteristics_ch1.1`
                       )

In [None]:
mtx_tr356  <- read_csv("../../240218_VN_Diabetes_V05/data/published_data/Terrazzano_2020/tr356_df_sum.csv")

In [None]:
mtx_tr356$`...1`  <- NULL

In [None]:
mtx_tr356  <- mtx_tr356  %>% column_to_rownames("SYMBOL")

In [None]:
mtx_tr356

In [None]:
plan("multisession")

In [None]:
library(tidyverse)

In [None]:
mtx_tr356  <- mtx_tr356  %>% mutate_all(.funs = as.numeric)

In [None]:
mtx_tr356

In [None]:
ref_tr3_56 <- list(matrix = mtx_tr356, 
                       labels = metadata$cell_type)


In [None]:
ref_tr3_56$matrix  <- as.matrix(ref_tr3_56$matrix)

In [None]:
library(SingleR)

In [None]:
pred <- SingleR(test = cd4_l1_full_filt@assays$RNA@counts, 
                ref=ref_tr3_56$matrix, labels=ref_tr3_56$labels, 
                fine.tune = T,num.threads = 4
       )

In [None]:
pred

In [None]:
cd4_l1_full_filt$singler  <- pred$labels

In [None]:
DimPlot(cd4_l1_full_filt, group.by = "annotations_l2")

In [None]:
DimPlot(cd4_l1_full_filt, group.by = "singler", shuffle = T, cols = c("purple","grey88","dodgerblue","red"))

In [None]:
options(repr.plot.width = 6.5, repr.plot.height = 4.5)
DimPlot(cd4_l1_full_filt, group.by = "annot2",
       cols = c("#d4a323ff", "#f1c07dff", "#7d262bff", "#d04f4fff"))

In [None]:
DimPlot(cd4_l1_full_filt, group.by = "annot2",
       cols = c("grey78", "grey78", "#7d262bff", "grey78"))

### Score per patient

In [None]:
cd4_l1_full_filt$score_tr356  <- pred$scores[,3]

In [None]:
options(repr.plot.width = 6, repr.plot.height = 8)

data.frame(Score = cd4_l1_full_filt$score_tr356,
                  Annotation = gsub(cd4_l1_full_filt$annotations_l2, pattern = "---", replacement = ""),
          Patient_ID = cd4_l1_full_filt$Patient_ID,
          Patient_Time = cd4_l1_full_filt$Patient_Time,
          Disease = cd4_l1_full_filt$Disease,
           Condition = cd4_l1_full_filt$Condition
          )  %>% 
mutate(Annotation = gsub(Annotation, pattern = "cd4 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "cd4 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "cd4 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
ggplot(aes(x = Condition, y = Score)) +
  geom_violin() + 
stat_compare_means(comparisons = list(c(1,2),c(2,3),c(1,3))) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")

In [None]:

data.frame(Score = cd4_l1_full_filt$score_tr356,
                  Annotation = gsub(cd4_l1_full_filt$annotations_l2, pattern = "---", replacement = ""),
          Patient_ID = cd4_l1_full_filt$Patient_ID,
          Patient_Time = cd4_l1_full_filt$Patient_Time,
          Disease = cd4_l1_full_filt$Disease, 
           Condition = cd4_l1_full_filt$Condition
          )  %>% 
mutate(Annotation = gsub(Annotation, pattern = "cd4 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "cd4 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "cd4 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
group_by(Condition, Patient_Time)  %>% 
summarize(Score = mean(Score))  %>% 
ggplot(aes(x = Condition, y = Score)) +
  geom_violin() + 
geom_point()+
stat_compare_means(comparisons = list(c(1,2),c(2,3))) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score All cd4 cells") + theme_classic() + ggtheme() + xlab("")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 6)

data.frame(Score = cd4_l1_full_filt$score_tr356,
                  Annotation = gsub(cd4_l1_full_filt$annotations_l2, pattern = "---", replacement = ""),
          Patient_ID = cd4_l1_full_filt$Patient_ID,
          Patient_Time = cd4_l1_full_filt$Patient_Time,
          Disease = cd4_l1_full_filt$Disease, 
           Condition = cd4_l1_full_filt$Condition
          )  %>% 
dplyr::filter(Patient_ID != 116)  %>% 
mutate(Annotation = gsub(Annotation, pattern = "cd4 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "cd4 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "cd4 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
group_by(Condition, Patient_Time)  %>% 
summarize(Score = mean(Score))  %>% 
ggplot(aes(x = Condition, y = Score)) +
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
stat_compare_means(comparisons = list(c(2,3)), paired = T) +
   ggtitle("TR3-56 score All cd4 cells") + theme_classic() + ggtheme() + xlab("")

ggsave("../figures/subset_characterization/tr3_56_score_in_condition_cd4.svg",
       width = 10, height = 9.5, units = "cm")

In [None]:
data.frame(Score = cd4_l1_full_filt$score_tr356,
                  Annotation = gsub(cd4_l1_full_filt$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "cd4 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "cd4 Unconventional T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "cd4 NK cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Tem","Tcm","Temra","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
ggplot(aes(x = fct_reorder(annot2, Score), y = Score)) +
  geom_violin() + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")
ggsave("../figures/cd356_score.svg", width = 4, height = 3.5)

In [None]:
options(repr.plot.width = 6.5, repr.plot.height = 4.5)
DimPlot(cd4_l1_full_filt, group.by = "singler", order = "cell subset: TR3-56",
        shuffle = F, cols = c("grey88","grey88","grey88","red"))

In [None]:
DimPlot(cd4_l1_full_filt, group.by = "singler", shuffle = T, cols = c("purple","grey88","dodgerblue","red"))

In [None]:
test  <- data.frame(annotation = cd4_l1_full_filt$annotations_l2,
                    pred = cd4_l1_full_filt$singler)

In [None]:
df2 <- test %>% group_by(annotation, pred) %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
df2

In [None]:
df2

In [None]:
cd4_l1_full_filt$score_tr356  <- pred$scores[,4]

In [None]:
VlnPlot(cd4_l1_full_filt, features = "score_tr356", group.by = "annotations_l2")

In [None]:
options(repr.plot.width = 14, repr.plot.height = 6)
data.frame(Score = cd4_l1_full_filt$score_tr356,
                  Annotation = gsub(cd4_l1_full_filt$annotations_l2, pattern = "---", replacement = "\n"))  %>% 
#mutate(Annotation = gsub(Annotation, pattern = "cd4 T cells", replacement = ""))  %>% 
#mutate(Annotation = gsub(Annotation, pattern = "cd4 Unconventional T cells", replacement = ""))  %>% 
#mutate(Annotation = gsub(Annotation, pattern = "cd4 NK cells", replacement = ""))  %>% 
ggplot(aes(x = fct_reorder(Annotation, Score), y = Score)) +
  geom_violin() + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + 
ggtheme() + theme_classic()  + theme(axis.ticks.x = element_blank()) + xlab("")

In [None]:
data.frame(Score = cd4_l1_full_filt$score_tr356,
                  Annotation = gsub(cd4_l1_full_filt$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD4 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD4 Unconventional T cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Treg","Tfh","ISAGhi","Th2","Nfkb","Th1Th17","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))  %>% 
ggplot(aes(x = fct_reorder(annot2, Score), y = Score)) +
  geom_violin() + 
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score") + theme_classic() + ggtheme() + xlab("")
ggsave("../figures/cd356_score_cd4.svg", width = 3, height = 3.5)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

DefaultAssay(cd4_l1_full_filt)  <- "RNA"
FeaturePlot(cd4_l1_full_filt, features = "NCAM1", min.cutoff = 0, max.cutoff = 2)

In [None]:
cd4_l1_full_filt@meta.data  <- cd4_l1_full_filt@meta.data  %>% mutate(
                  Annotation = gsub(cd4_l1_full_filt$annotations_l2, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD4 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD4 Unconventional T cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Treg","Tfh","ISAGhi","Th2","Nfkb","Th1Th17","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))

In [None]:
cd4_l1_full_filt$annotations_l2_sample  <- paste(cd4_l1_full_filt$annot2, cd4_l1_full_filt$Sample_ID)

In [None]:
cd4_l1_full_filt@meta.data  <- cd4_l1_full_filt@meta.data  %>%  group_by(Patient_Time)  %>% tally

In [None]:
# Extract the dataframe with seurat metadata
seurat_meta_data <- cd4_l1_full_filt@meta.data
seurat_meta_data$sample <- seurat_meta_data$annotations_l2_sample

# Select genes of interest
# The script only works for two or more genes, you can't select just one!
gene_hits <- c("NCAM1", "TRGC1", "CD8A", "CD8B", 
               "FCGR3A","CD3D","CD3G","CD3Z", "ZBTB16", 
              "CD4","GZMB")

# By default the percentage of calculated cells is computed for whole sample
# Optionally, you can select a cluster that you're interested in by subsetting:
# pbmc_small <- subset(pbmc_small, seurat_clusters == 3)


# Now we will calculate the percentage of expressing cells for each sample and we will merge the resulting dataframes
expr_data4 <- data.frame(genes = gene_hits)

# We will need a function that will convert any non-zero count to value 1
fns_replace <- function(x){ifelse(x>0,1,0)}

for(j in (pull(seurat_meta_data, sample)  %>% unique)){
  
  # subset only selected cell type
  seu_sub_sample <- subset(cd4_l1_full_filt, annotations_l2_sample == j) 
  
  # select the rows corresponding to genes of interest
  index_subset <- which(rownames(seu_sub_sample@assays$RNA@counts) %in% gene_hits)
  
  # create a dataframe with genes of interest and cells of interest
  expr_data <- as.data.frame(seu_sub_sample@assays$RNA@counts[index_subset,]) 
  
  # convert expression to binary values
  expr_data2 <- expr_data %>% mutate(across(.fns = fns_replace))
  rownames(expr_data2) <- rownames(expr_data)
  
  # calculate average expresion (percentage of cells expressing the gene)
  expr_data3 <- rowMeans(expr_data2)
  
  # add zeroes in cases of no expression
  for(k in gene_hits){
    if(k %in% names(expr_data3) == F){expr_data3[[k]] <- 0}
  }
  
  expr_data3 <- as.data.frame(expr_data3)
  colnames(expr_data3) <- j
  expr_data3$genes <- rownames(expr_data3)
  
  # final dataframe with values in correct order (all cell type, loop results)
  expr_data4 <- left_join(expr_data4, expr_data3, by="genes")
  
}

In [None]:
expr_data4 

In [None]:
# Now we have the whole dataframe, we just need to tidy it a bit, add metadata and visualize

expr_data5 <- as.data.frame(t(expr_data4))
colnames(expr_data5) <- expr_data5[1,]
expr_data5 <- expr_data5[2:nrow(expr_data5),]
expr_data5$sample <- rownames(expr_data5)
expr_data5 <- expr_data5 %>% pivot_longer( !sample, names_to = "gene", values_to = "pct_express")

# Add metadata per sample - select those that you will use in the plot below
md_to_join <- seurat_meta_data %>% dplyr::select(sample = annotations_l2_sample)  %>% 
unique() %>% mutate(sample = as.character(sample))
md2 <- left_join(expr_data5, md_to_join, by = "sample") %>% ungroup %>% unique

In [None]:
md2

md2 %>% 
mutate(Sample_ID = substr(sample,nchar(sample)-3,nchar(sample)))  %>% 
mutate(population = substr(sample,1,nchar(sample)-3)) 

options(repr.plot.width = 20, repr.plot.height = 12)

md2 %>% 
dplyr::filter(gene != "CD3Z")  %>% 
pivot_wider(names_from = "gene", values_from = "pct_express", values_fill = "0")  %>% 
pivot_longer(cols = c(2:11), names_to = "gene", values_to = "pct_express")  %>% 
mutate(Sample_ID = substr(sample,nchar(sample)-3,nchar(sample)))  %>% 
mutate(population = substr(sample,1,nchar(sample)-3))  %>% 
ggplot(aes(x = population, y = as.numeric(pct_express))) + 
 geom_violin(alpha = 0.3, aes(fill = population), scale = "width") + 
 
geom_beeswarm(size = 1.5, aes(fill = population), cex = 2, 
                shape = 21, color = "grey40",corral = "random") +
 stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
 #stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
  theme_minimal() + 
  facet_wrap(~gene, scales = "free", ncol = 5) + 
  ylim(c(0,NA)) +
#ggpubr::stat_compare_means()+
  ylab("Percentage of expressing cells") +
  theme_classic() + ggtheme() +
 theme(legend.title = element_blank(),axis.text.x = element_text(angle = 90)) 

#ggsave("../figures/scRNAseq_gd_characterization.svg", width = 13, height = 9)

In [None]:
options(repr.plot.width = 15, repr.plot.height = 8)

md2 %>% 
pivot_wider(names_from = "gene", values_from = "pct_express", values_fill = "0")  %>% 
pivot_longer(cols = c(2:11), names_to = "gene", values_to = "pct_express")  %>% 
mutate(Sample_ID = substr(sample,nchar(sample)-3,nchar(sample)))  %>% 
mutate(population = substr(sample,1,nchar(sample)-3))  %>% 
ggplot(aes(x = population, y = as.numeric(pct_express))) + 
  geom_boxplot(outlier.shape = NA) +
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(position=position_jitter(0.2), size = 2, aes(color = factor(population))) +
  theme_minimal() + 
  facet_wrap(~gene, scales = "free", ncol = 5) + 
  ylim(c(0,NA)) +
ggpubr::stat_compare_means()+
  ylab("Percentage of expressing cells") +
  theme(legend.title = element_blank())

In [None]:
options(repr.plot.width = 20, repr.plot.height = 12)

md2 %>% 
dplyr::filter(gene != "CD3Z" & gene != "CD8A")  %>% 
pivot_wider(names_from = "gene", values_from = "pct_express", values_fill = "0")  %>% 
pivot_longer(cols = c(2:10), names_to = "gene", values_to = "pct_express")  %>% 
mutate(Sample_ID = substr(sample,nchar(sample)-3,nchar(sample)))  %>% 
mutate(population = substr(sample,1,nchar(sample)-3))  %>% 
ggplot(aes(x = population, y = as.numeric(pct_express))) + 
 geom_violin(alpha = 0.4, aes(fill = population), scale = "width") + 
scale_fill_manual(values = c("#d4a323ff", "#f1c07dff", "#800000b4","#dd665bff")) + 
#geom_beeswarm(size = 1.5, aes(fill = population), cex = 2, 
#                shape = 21, color = "grey40",corral = "random") +
 stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
 #stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
  theme_minimal() + 
  facet_wrap(~gene, scales = "free", ncol = 5) + 
  ylim(c(0,NA)) +
#ggpubr::stat_compare_means()+
  ylab("Percentage of expressing cells") +
  theme_classic() + ggtheme() +
 theme(legend.title = element_blank(),axis.text.x = element_text(angle = 90)) 

ggsave("../figures/scRNAseq_CD4Temra_characterization.svg", width = 13, height = 9)

In [None]:
test

In [None]:
ggplot(test) +
  aes(x = annotation, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
scale_fill_manual(values = c("white","white", "white", "red2")) + 
 theme(legend.title = element_blank(),axis.text.x = element_text(angle = 90)) 

In [None]:
options(repr.plot.width = 7, repr.plot.height = 23)
test2  <- test  %>% mutate(
                  Annotation = gsub(annotation, pattern = "---", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD4 T cells", replacement = ""))  %>% 
mutate(Annotation = gsub(Annotation, pattern = "CD4 Unconventional T cells", replacement = ""))  %>% 
mutate(annot2 = case_when(Annotation %in% c("Treg","Tfh","ISAGhi","Th2","Nfkb","Th1Th17","Proliferating") ~ "NonNaive", 
                          TRUE ~ Annotation))

In [None]:
test2  %>% unique

In [None]:
options(repr.plot.width = 7, repr.plot.height = 30)

ggplot(test2) +
  aes(x = annot2, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
scale_fill_manual(values = c("white","white", "white", "red2")) + 
scale_y_continuous(n.breaks = 40, expand = c(0,NA)) +
 theme(legend.title = element_blank(),axis.text.x = element_text(angle = 90)) 

ggsave("../figures/cd4_temra_pct.svg", width = 15, height = 45, units = "cm")

## Temra subclustering

In [None]:
cd4_l3_temra  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L3/cd4_l3_temra.rds")

In [None]:
### Temra
options(repr.plot.width = 6, repr.plot.height = 5)

DimPlot(cd4_l3_temra, label = T)

In [None]:
mrk  <- FindAllMarkers(cd4_l3_temra)

In [None]:
options(repr.plot.width = 12, repr.plot.height = 6)

FeaturePlot(cd4_l3_temra, features = c("CD4","TRGV2","TRDC","CD8A","NCAM1","GZMB"), ncol = 3)

In [None]:
FeaturePlot(cd4_l3_temra, features = c("CD4","TRGV2","NCR1","MKI67","CD3D"), ncol = 3)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 3.5)
DimPlot(cd4_l3_temra, label = F, shuffle = TRUE, 
        group.by = "annotations_manual", 
       cols = c("#af3d3db4", "#640000ff", "#b124cdff", "#9ad5ffff","#ebc6d3ff" )) + NoLegend()

In [None]:
options(repr.plot.width = 6, repr.plot.height = 3.5)

DimPlot(cd4_l3_temra, label = F, shuffle = TRUE, 
        group.by = "annotations_manual", 
       cols = c("#af3d3db4", "#640000ff", "#b124cdff", "#9ad5ffff","#ebc6d3ff" )) 

In [None]:
df4  <- create_df4(cd4_l3_temra)

In [None]:
p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(1,2)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 15, repr.plot.height = 7)
p5

In [None]:
options(repr.plot.width = 15, repr.plot.height = 7)
p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(2,3)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,10)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))
p5

ggsave("../figures/subset_characterization/temra_subsets_in_condition.svg",
       width = 30, height = 12, units = "cm")

## Temra without NK

In [None]:
cd4_l3_temra

In [None]:
cd4_l3_temra  <- subset(cd4_l3_temra, seurat_clusters %in% c(0,1,2,4))

In [None]:
DefaultAssay(cd4_l3_temra)  <- "integrated"


In [None]:
cd4_l3_temra <- RunUMAP(cd4_l3_temra, dims = 1:12)


In [None]:
cd4_l3_temra <- FindNeighbors(cd4_l3_temra, reduction = "pca", dims = 1:12)

In [None]:
cd4_l3_temra <- FindClusters(cd4_l3_temra, resolution = 0.5)

DimPlot(cd4_l3_temra, label = T)

In [None]:
mrk  <- FindAllMarkers(cd4_l3_temra, logfc.threshold = log(2))

In [None]:
mrk  %>% dplyr::filter(cluster == 3 & avg_log2FC > 1)

In [None]:
markers  <- c("IL7R","TRDC","GZMK","GZMB","CD244","TRGV9","LTB","KLRB1","CXCR6","CX3CR1","ZEB2","GNLY",
             "ANXA1","ITGB1","CD27","TCF7","SELL","XCL2","NELL2","CD160","TIGIT","CTLA4")

In [None]:
avgexp = AverageExpression(cd4_l3_temra, features = markers,
                           return.seurat = F, group.by = "seurat_clusters", 
                          assay = "RNA")

avgexp$RNA

In [None]:
markers  <- c("IL7R","SELL","KLRB1","CX3CR1","ANXA1","ITGB1","GZMB","GNLY","ZEB2","LTB","CXCR6","GZMK",
             "NELL2","TCF7","CD27","XCL2","CD160")
avgexp = AverageExpression(cd4_l3_temra, features = markers,
                           return.seurat = F, group.by = "seurat_clusters", 
                          assay = "RNA")


In [None]:
options(repr.plot.width = 14, repr.plot.height = 3.7)
pheatmap(t(avgexp$RNA), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 9,
      #  filename = "../figures/subset_characterization/cd4_temra_heatmap.pdf",
        width = 8, height = 3)

In [None]:
options(repr.plot.width = 14, repr.plot.height = 3.7)
pheatmap(t(avgexp$RNA), main = "", 
         scale = "column", cluster_cols = F, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white",
                  fontsize = 9,
        filename = "../figures/subset_characterization/cd4_temra_heatmap.pdf",
        width = 8, height = 3)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

DimPlot(cd4_l3_temra, , pt.size = 7, raster = T, raster.dpi = c(600,600),
        cols = c("#c86f6fff", "#640000ff", "#e29bb3ff", "#f4d7d7ff" ))
#ggsave("../figures/subset_characterization/DimPlot_temra_subsets.svg", 
#      width = 10, height = 7.5, units = "cm")

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

FeaturePlot(cd4_l3_temra, "TIGIT", min.cutoff = 0)

In [None]:
FeaturePlot(cd4_l3_temra, "GZMK", min.cutoff = 0)

In [None]:
FeaturePlot(cd4_l3_temra, "ZBTB16", min.cutoff = 0)

In [None]:
FeaturePlot(cd4_l3_temra, "TRGV9", min.cutoff = 0)

In [None]:
FeaturePlot(cd4_l3_temra, "CD4", min.cutoff = 0)

In [None]:
FeaturePlot(cd4_l3_temra, "CD8B", min.cutoff = 0)

In [None]:
FeaturePlot(cd4_l3_temra, "CD8B", min.cutoff = 0)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cd4_l3_temra, 
        cells.highlight = colnames(cd4_l3_temra)[cd4_l3_temra$v_gene_A1 == "TRAV10" & 
                                                    cd4_l3_temra$j_gene_A1 == "TRAJ18"]) + NoLegend()


In [None]:
cd4_l3_temra$annotations_manual  <- cd4_l3_temra$seurat_clusters

In [None]:
df4  <- create_df4(cd4_l3_temra)

In [None]:
p5  <-   df4  %>% 
filter(Experiment_ID %in% c("Exp16", "Exp18", "Exp19", "Exp20"))  %>% 
  ggplot(aes(x = Condition, y = freq*100)) + # you can change the x to whatever variable you're interested in
   geom_violin(alpha = 0.3, aes(fill = Condition)) +
  stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
   scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
geom_beeswarm(size = 3, cex = 3, 
              color = "black", method = "center",
             aes(fill = Condition), shape = 21) +
   facet_wrap(~annotations_manual, scales = "free", ncol = 9) +
  ylab("Frequency") +
  xlab("Condition") +
  theme_classic() +
ggtheme() +
ggpubr::stat_compare_means(label.x= 1.5, 
                           size = 7, vjust = -1, label = "p.format", comparisons = list(c(2,3)))+
ggtheme() +
 scale_y_continuous(limits = c(0,NA), expand = c(0.05,0,0,15)) +
  theme(strip.background = element_blank(), panel.grid = element_blank()) + 
  theme(axis.text = element_text(color = "black"),
        axis.text.x = element_text(angle = 90),
       axis.line = element_line(color = "black", size = 0.5),
        axis.ticks.x = element_blank()) + NoLegend() + ggtitle("Final data all groups")
ncols = length(levels(factor(df4$annotations_manual)))

In [None]:
options(repr.plot.width = 14, repr.plot.height = 6)
p5

In [None]:
ggsave("../figures/subset_characterization/temra_subsets_in_condition.svg",
       width = 30, height = 12, units = "cm")

### CD3-56 score in Temra

In [None]:
colnames(cd4_l3_temra) %in% colnames(cd4_l1_full_filt)  %>% table

In [None]:
DimPlot(cd4_l3_temra)

In [None]:
md_temra  <- cd4_l3_temra@meta.data

In [None]:
md_full  <- cd4_l1_full_filt@meta.data  %>% dplyr::select(barcode, score_tr356, singler)

In [None]:
md_joint  <- left_join(md_temra, md_full)

In [None]:
rownames(md_joint)  <- rownames(cd4_l3_temra@meta.data)

In [None]:
cd4_l3_temra@meta.data  <- md_joint

In [None]:
DimPlot(cd4_l3_temra, group.by = "singler", cols = c("grey88", "blue", "red"))

In [None]:
DimPlot(cd4_l3_temra, label.size = 10, label = T)

In [None]:
test  <- data.frame(annotation = cd4_l3_temra$seurat_clusters,
                    pred = cd4_l3_temra$singler)

df2 <- test %>% group_by(annotation, pred) %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

df2

options(repr.plot.width = 8, repr.plot.height = 4.5)
ggplot(test) +
  aes(x = annotation, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
coord_flip() + 
scale_fill_manual(values = c("grey88", "lightskyblue1", "red2"))


In [None]:
options(repr.plot.width = 5, repr.plot.height = 6)

data.frame(Score = cd4_l3_temra$score_tr356,
          Patient_ID = cd4_l3_temra$Patient_ID,
          Patient_Time = cd4_l3_temra$Patient_Time,
          Disease = cd4_l3_temra$Disease, 
          Cluster = cd4_l3_temra$seurat_clusters, 
           Condition = cd4_l3_temra$Condition
          )   %>% 
group_by(Cluster, Patient_Time)  %>% 
summarize(Score = mean(Score))  %>% 
ggplot(aes(x = Cluster, y = Score)) +
  geom_violin() + 
geom_point()+
stat_compare_means(comparisons = list(c(1,2),c(2,3))) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score cd4 Temra cells") + theme_classic() + ggtheme() + xlab("")

In [None]:
options(repr.plot.width = 5, repr.plot.height = 6)

data.frame(Score = cd4_l3_temra$score_tr356,
          Patient_ID = cd4_l3_temra$Patient_ID,
          Patient_Time = cd4_l3_temra$Patient_Time,
          Disease = cd4_l3_temra$Disease, 
           Condition = cd4_l3_temra$Condition
          )   %>% 
group_by(Condition, Patient_Time)  %>% 
summarize(Score = mean(Score))  %>% 
ggplot(aes(x = Condition, y = Score)) +
  geom_violin() + 
geom_point()+
stat_compare_means(comparisons = list(c(1,2),c(2,3))) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score cd4 Temra cells") + theme_classic() + ggtheme() + xlab("")

In [None]:
data.frame(Score = cd4_l3_temra$score_tr356,
          Patient_ID = cd4_l3_temra$Patient_ID,
          Patient_Time = cd4_l3_temra$Patient_Time,
          Disease = cd4_l3_temra$Disease, 
           Condition = cd4_l3_temra$Condition
          )   %>% 
ggplot(aes(x = Condition, y = Score)) +
  geom_violin() + 
stat_compare_means(comparisons = list(c(1,2),c(2,3))) +
  stat_summary(fun = "mean",
               geom = "crossbar", 
               width = 0.5,
               colour = "red") + ggtitle("TR3-56 score cd4 Temra cells") + theme_classic() + ggtheme() + xlab("")

## GZMK in CD4 Temra

In [None]:
genes  <- rownames(cd4_l3_temra@assays$RNA)

In [None]:
genes_filt  <- genes[!(grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "\\.")|
                            grepl(genes, pattern = "LINC")|
                            grepl(genes, pattern = "^MIR")|
                            grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "HNRNP")|
                            grepl(genes, pattern = "^IG[KLH]")|
                            grepl(genes, pattern = "^RP[LS]")|
                            grepl(genes, pattern = "\\-")|
                            grepl(genes, pattern = "TTTY")|
                            grepl(genes, pattern = "ORF")|
                            grepl(genes, pattern = "orf"))
                            ]

In [None]:
aggexp_cd4  <- AverageExpression(cd4_l3_temra, group.by = c("Sample_ID"), 
                                  features = genes_filt, return.seurat = T, assay = "RNA")

In [None]:
md_cd4  <- cd4_l3_temra@meta.data  %>% 
group_by(Sample_ID, Condition, Condition2, Age, Sex)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% 
mutate(Sample_ID = as.factor(Sample_ID))

md_cd4$Sample_ID  <- paste0("g",md_cd4$Sample_ID)


md_cd4  <- left_join(aggexp_cd4@meta.data, md_cd4)
rownames(md_cd4)  <- colnames(aggexp_cd4)

In [None]:
aggexp_cd4@meta.data  <- md_cd4

In [None]:
VlnPlot(aggexp_cd4, group.by = "Condition", features = "GZMK")

In [None]:
VlnPlot(aggexp_cd4, group.by = "Condition", features = "GZMK", slot = "data")

# DE genes heatmap avg.

## Heatmap

In [None]:
cd4_l1_full_filt  <- readRDS("../data/processed/L1/cd4_l1_full_filt.rds")

### All cells

In [None]:
cd4_l1_full_filt$Patient_Time_Disease  <- paste(cd4_l1_full_filt$Patient_ID, cd4_l1_full_filt$Time, 
                                               cd4_l1_full_filt$Disease)

In [None]:
cd4_l1_full_filt$Disease_time  <- paste(
                                               cd4_l1_full_filt$Disease,
cd4_l1_full_filt$Time)

In [None]:
avgexp = AverageExpression(subset(cd4_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = F, group.by = "Disease_time", 
                          assay = "RNA")

In [None]:
genes  <- c("PCBP2","PCBP1",
            "CX3CR1","TNF","GZMB","GZMA","PRF1","NKG7","GNLY","CCL5","CST7",
            "BTG1","SELL","IL7R","CCR7","BTG2","SLAMF6","LEF1",
             "TNFAIP3","TSC22D3","NKFBIA","DUSP1")

In [None]:
avgexp  <- avgexp$RNA[which(rownames(avgexp$RNA) %in% genes),]

In [None]:
library(pheatmap)

options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp, main = "", scale = "row", cluster_cols = F, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

### Average of patient

In [None]:
cd4_l1_full_filt$Patient_Time_Disease  <- paste(cd4_l1_full_filt$Patient_ID, cd4_l1_full_filt$Time, 
                                               cd4_l1_full_filt$Disease)

In [None]:
avgexp = AverageExpression(subset(cd4_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = T, group.by = "Patient_Time_Disease", 
                          assay = "RNA")

In [None]:
avgexp$Patient_Time_Disease  <- colnames(avgexp)
avgexp@meta.data  <- avgexp@meta.data  %>% 
separate(Patient_Time_Disease, into = c("Patient", "Time", "Disease"), sep = " ", remove = F)

In [None]:
avgexp$Disease_Time  <- paste(avgexp$Disease, avgexp$Time)

In [None]:
avgexp2 = AverageExpression(avgexp, 
                             return.seurat = F, group.by = "Disease_Time")

In [None]:
genes4  <- c("LEF1","BACH2","NELL2","TCF7","CXCR4","ZFP36L2","IL7R","KLF2","CCR7","SELL",
            
             
            rev(c("TNFRSF9","PRF1","NKG7","GZMB","CST7","GNLY","GZMA","CX3CR1","CCL5","TNF","TBX21","IFNG")),
             "TNFAIP3","DUSP1","TSC22D3","NFKBIA","DDIT4","INPP4B",
             rev(c("OASL","IFI44L","ISG15","MX1","STAT1","IFI6","IFIT3")),
                  "BTN3A2","BTN3A3","HLA-C","HLA-DQA1","HLA-DQB1")

In [None]:
avgexp3  <- avgexp2$RNA[which(rownames(avgexp2$RNA) %in% genes4),]

In [None]:
options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp3[match(genes4, rownames(avgexp3)),], 
         main = "", scale = "row", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
dir.create("../figures/heatmaps/")

In [None]:
options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp3[match(genes4, rownames(avgexp3)),], 
         main = "", scale = "row", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 3.3, height = 9,
                  fontsize = 9, filename = "../figures/heatmaps/fig2_cd4.pdf")

# All clusters

In [None]:
genes  <- c("PCBP1","PCBP2","IL7R","LEF1","TCF7","CCR7","SELL","BACH2","NELL2","PRF1",
"NKG7","GZMB","CST7","GNLY","CX3CR1","CCL5","TNF","KLRG1","TBX21","IFI44L",
"ISG15","MX1","EPSTI","OAS1","TXNIP","LY6E","DUSP1","TSC22D3","NFKBIA","TNFAIP3")

In [None]:
cd4_l1_full_filt$Patient_Time_Disease_CLuster  <- paste(
cd4_l1_full_filt$Patient_Time, cd4_l1_full_filt$Disease, cd4_l1_full_filt$annotations_l2, sep = "...")

In [None]:
avgexp = AverageExpression(subset(cd4_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20")), 
                             return.seurat = T, group.by = "Patient_Time_Disease_CLuster", 
                          assay = "RNA")

In [None]:
colnames(avgexp)  %>% head

In [None]:
avgexp$Patient_Time_Disease_CLuster  <- colnames(avgexp)
avgexp@meta.data  <- avgexp@meta.data  %>% 
separate(Patient_Time_Disease_CLuster, into = c("Patient_Time", "Disease", "Cluster"), sep = "\\.\\.\\.", remove = F)

In [None]:
avgexp@meta.data  <- avgexp@meta.data  %>% 
separate(Patient_Time, into = c("Patient", "Time"), sep = " ", remove = F)
avgexp$Disease_Time  <- paste(avgexp$Disease, avgexp$Time)

In [None]:
avgexp$Disease_Time_Cluster  <- paste(avgexp$Disease, avgexp$Time, avgexp$Cluster)

In [None]:
avgexp$Disease_Time_Cluster  %>% table

In [None]:
avgexp2 = AverageExpression(avgexp, 
                             return.seurat = F, group.by = "Disease_Time_Cluster")

In [None]:
avgexp3  <- avgexp2$RNA[which(rownames(avgexp2$RNA) %in% genes),]

In [None]:
rownames(avgexp3)[match(genes, rownames(avgexp3))]

In [None]:
options(repr.plot.width = 8, repr.plot.height = 9)
pheatmap(avgexp3[match(genes, rownames(avgexp3)),], main = "", scale = "row", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
pheatmap(avgexp3[match(genes, rownames(avgexp3)),c(1,11,21)], main = "", scale = "row", cluster_cols = F, cluster_rows = F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

In [None]:
avgexp

In [None]:
Idents(avgexp)  <- avgexp$Disease_Time

In [None]:
VlnPlot(avgexp, "IFI44L")

In [None]:
write.csv(avgexp3, "../tables/avg_heatmap/cd4_avg_by_patient.csv")

In [None]:
genes2  <- c("PCBP2","PCBP1","CXCR4","FOXO1","IL7R","LEF1","CCR7","SELL",
             "DUSP1","TSC22D3","NFKBIA","TNFAIP3",
             "IFI44L","ISG15","MX1","EPSTI1",
            "PRF1","NKG7","GZMB","CST7","GNLY","GZMA","CX3CR1","CCL5","TNF"
            )

In [None]:
avgexp3  <- avgexp2$RNA[which(rownames(avgexp2$RNA) %in% genes2),]

In [None]:
pheatmap(avgexp3[match(genes2,rownames(avgexp3)),], 
         main = "", scale = "row", cluster_cols = F, cluster_rows =F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 5, height = 10,
                  fontsize = 9)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp3[match(genes2,rownames(avgexp3)),], 
         main = "", scale = "row", cluster_cols = F, cluster_rows =F,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 5, height = 10,
                  fontsize = 9,
        filename = "../figures/heatmaps/de_genes_cd4_2.pdf")

## Both heatmaps

In [None]:
cd4_avg_by_patient  <- avgexp3

In [None]:
cd8_avg_by_patient  <- read_csv("../tables//avg_heatmap/cd8_avg_by_patient.csv")

In [None]:
avgexp3

In [None]:
cd8_avg_by_patient$`...1` == rownames(cd4_avg_by_patient)

In [None]:
cd8_avg_by_patient$`...1`  <- NULL

In [None]:
colnames(cd8_avg_by_patient)  <- paste("CD8", colnames(cd8_avg_by_patient))

In [None]:
colnames(cd4_avg_by_patient)  <- paste("CD4", colnames(cd4_avg_by_patient))

In [None]:
all  <- cbind(cd8_avg_by_patient, cd4_avg_by_patient)

In [None]:
all

In [None]:
pheatmap(all, main = "", scale = "row", cluster_cols = T, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)