In [None]:
source("diabetes_analysis_v06.R")

In [None]:
options(future.globals.maxSize = 3e+09)

In [None]:
library(BPCells)
library(ggrepel)
# set this option when analyzing large datasets
options(future.globals.maxSize = 3e+09)

In [None]:
plan("multisession")

In [None]:
library(pheatmap)

In [None]:
library(STACAS)

# Loading and preprocessing of the data

In [None]:
mat <- ReadParseBio("../../../DATA_scRNAseq/Analysis of previously published data/081_ParseBio_1M_Diabetes/Raw/")

In [None]:
cell_metadata <- read.csv("../../../DATA_scRNAseq/Analysis of previously published data/081_ParseBio_1M_Diabetes/Raw/cell_metadata.csv",
                         row.names = 1)

In [None]:
pbmc <- CreateSeuratObject(mat, min.cells = 100, min.features = 100,
names.field = 0, meta.data = cell_metadata)

In [None]:
rm(mat)
rm(cell_metadata)
gc()

In [None]:
pbmc$barcode  <- colnames(pbmc)
pbmc  <- subset(pbmc, barcode %in% sample(colnames(pbmc), size = 200000))

In [None]:
pbmc <- NormalizeData(pbmc, verbose = FALSE)
pbmc <- ScaleData(pbmc, verbose = FALSE)
pbmc <- FindVariableFeatures(pbmc, nfeatures = 1000, verbose = FALSE)

pbmc <- RunPCA(pbmc, dims = 1:12)
pbmc <- RunUMAP(pbmc, reduction = "pca", dims = 1:12)
  

pbmc <- FindNeighbors(pbmc, dims = 1:12)
pbmc <- FindClusters(pbmc, resolution = 0.7)
print("CLustering done!")

saveRDS(pbmc, paste0("../../../DATA_scRNAseq//Analysis of previously published data/081_ParseBio_1M_Diabetes/241020_ParseBio_full_200k.rds"))

In [None]:
DimPlot(pbmc)

In [None]:
FeaturePlot(pbmc, features = c("CD14","MS4A1","TRAC","LCK"), ncol = 4)

In [None]:
FeaturePlot(pbmc, features = c("CD3D","CD8A","CD4","NCR1"), ncol = 4)

In [None]:
FeaturePlot(pbmc, features = c("MKI67","NCR1","IL7R","CD4"), ncol = 4)

In [None]:
DimPlot(pbmc, label = T, label.size = 10)

# Subsetting T cells

In [None]:
pbmc  <- subset(pbmc, seurat_clusters %in% c(0,2,3,4,6,9,12,18))

DimPlot(pbmc, label = T, label.size = 10)

     pbmc <- NormalizeData(pbmc, verbose = FALSE)
        pbmc <- ScaleData(pbmc, verbose = FALSE)
        pbmc <- FindVariableFeatures(pbmc, nfeatures = 1000, verbose = FALSE)

pbmc <- RunPCA(pbmc, dims = 1:12)
pbmc <- RunUMAP(pbmc, reduction = "pca", dims = 1:12)
  

pbmc <- FindNeighbors(pbmc, dims = 1:12)
pbmc <- FindClusters(pbmc, resolution = 0.7)
            print("CLustering done!")

In [None]:
saveRDS(pbmc, paste0("../../../DATA_scRNAseq//Analysis of previously published data/081_ParseBio_1M_Diabetes/241020_ParseBio_200k_filtT.rds"))

In [None]:
pbmc  <- readRDS("../../../DATA_scRNAseq//Analysis of previously published data/081_ParseBio_1M_Diabetes/241020_ParseBio_200k_filtT.rds")

In [None]:
DimPlot(pbmc)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 4)
FeaturePlot(pbmc, features = c("FOXP3","CD8A","CD4","NCR1"), max.cutoff = c(2,2,2,2), ncol = 4)

In [None]:
FeaturePlot(pbmc, features = c("MKI67","NCR1","IL7R","CD4"), ncol = 4)

FeaturePlot(pbmc, features = c("FOXP3","GATA3","NCAM1","RORG"), ncol = 4)

In [None]:
DimPlot(pbmc, group.by = "sample")

In [None]:
pbmc$Disease  <- substr(pbmc$sample, 1,1)

In [None]:
DimPlot(pbmc, group.by = "Disease")

In [None]:
Idents(pbmc)  <- pbmc$Disease

In [None]:
mrk  <- FindAllMarkers(pbmc, only.pos = T)

In [None]:
mrk  %>% dplyr::filter(cluster == "H" & !grepl(gene, pattern = "^MT") &
                      grepl(gene, pattern = "GZMB"))

In [None]:
mrk  %>% dplyr::filter(cluster == "H" & !grepl(gene, pattern = "^MT") &
                      grepl(gene, pattern = "CCL5"))

In [None]:
mrk  %>% dplyr::filter(cluster == "H" & !grepl(gene, pattern = "^MT") &
                      grepl(gene, pattern = "GNLY"))

In [None]:
mrk  %>% dplyr::filter(cluster == "D" & !grepl(gene, pattern = "^MT"))

In [None]:
mrk_fc  <- FindMarkers(pbmc, `ident.1` = "D", `ident.2` = "H", 
                       logfc.threshold = -Inf, min.pct = -Inf, 
                       min.diff.pct = -Inf,
                                   only.pos = F,  
                                   min.cells.feature = 1, return.thresh = 1)

In [None]:
mrk_fc$gene  <- rownames(mrk_fc)

In [None]:
mrk_fc  %>% dplyr::filter(!grepl(gene, pattern = "^MT"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\."))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\-"))

In [None]:
mrk_fc  %>% dplyr::filter(!grepl(gene, pattern = "^MT"))  %>% 
dplyr::filter(p_val_adj<0.5)  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\."))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\-"))  %>% 
mutate(label = ifelse(gene %in% c("IL7R",
"LEF1",
"TCF7",
"CCR7",
"SELL",
"BACH2",
"NELL2",
"PRF1",
"NKG7",
"GZMB",
"CST7",
"GNLY",
"CX3CR1",
"CCL5",
"TNF",
"KLRG1",
"TBX21"), gene, NA_character_))  %>% 
  ggplot(aes(x = avg_log2FC, y = -log10(p_val_adj), label = label)) + 
  geom_point(color = "white") + 
        theme_minimal() +
        geom_text_repel() +
        #scale_color_manual(values=c("blue", "black", "red")) +
        geom_vline(xintercept=c(-0.6, 0.6), col="red") +
        geom_hline(yintercept=-log10(0.05), col="red")

In [None]:
pbmc$sample  %>% table

In [None]:
avg_exp  <- AverageExpression(pbmc, return.seurat = F, group.by = "sample")

In [None]:
df  <- avg_exp$RNA  %>% 
as.data.frame  %>% 
rownames_to_column("gene")  %>%  
pivot_longer(!gene, names_to = "sample", values_to = "value")  %>% 
 mutate(disease = substr(sample, 1, 1))

In [None]:
options(repr.plot.width = 8, repr.plot.height = 3)
df  %>%
dplyr::filter(gene %in% c(
"GZMB",
"GNLY",
"CX3CR1",
"CCL5"
))  %>% 
ggplot(aes(x = disease, y = value)) +
facet_wrap(~gene, ncol = 7, scales = "free") + 
geom_boxplot(outlier.shape = NA, aes(fill = disease), alpha = 0.5) +
     geom_crossbar(stat='summary', fun='median', alpha = 0.6, width = 0.7) +
     geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
     geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.05), 
                 size = 2.5, stackdir='center', aes(color = disease)) + 
     theme_classic() 

In [None]:
options(repr.plot.width = 8, repr.plot.height = 5)
df  %>%
dplyr::filter(gene %in% c("CXCR4",
"ZFP36L2",
"TSC22D3"
))  %>% 
ggplot(aes(x = disease, y = value)) +
facet_wrap(~gene, ncol = 7) + 
geom_boxplot(outlier.shape = NA, aes(fill = disease), alpha = 0.5) +
     geom_crossbar(stat='summary', fun='median', alpha = 0.6, width = 0.7) +
     geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
     geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.05), 
                 size = 2.5, stackdir='center', aes(color = disease)) + 
     theme_classic() + ggpubr::stat_compare_means()

In [None]:
options(repr.plot.width = 4, repr.plot.height = 3)
df  %>%
dplyr::filter(gene %in% c("BACH2","NELL2"
))  %>% 
ggplot(aes(x = disease, y = value)) +
facet_wrap(~gene, ncol = 7) + 
geom_boxplot(outlier.shape = NA, aes(fill = disease), alpha = 0.5) +
     geom_crossbar(stat='summary', fun='median', alpha = 0.6, width = 0.7) +
     geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
     geom_jitter(binaxis='y', position=position_jitter(width = 0.1, height = 0.05), 
                 size = 2.5, stackdir='center', aes(color = disease)) + 
     theme_classic() + ggpubr::stat_compare_means() 

In [None]:
Idents(pbmc)  <- pbmc$seurat_clusters

In [None]:
options(repr.plot.width = 7, repr.plot.height = 5)
DimPlot(pbmc, label  = T, label.size = 10, repel = T)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 4)
FeaturePlot(pbmc, features = c("FOXP3","CD8A","CD4","NCR1"), max.cutoff = c(2,2,2,2), ncol = 4)

# Separating CD4 and CD8 T cells

In [None]:
which(rownames(pbmc@assays$RNA) == "CD4")

In [None]:
which(rownames(pbmc@assays$RNA) == "CD8A")

In [None]:
pbmc$cd4_or_cd8  <- ifelse(pbmc@assays$RNA$counts[245,]>0 & pbmc@assays$RNA$counts[8716,]>0, "Both",
                               ifelse(pbmc@assays$RNA$counts[245,]>0 & pbmc@assays$RNA$counts[8716,]==0, "CD4",
                               ifelse(pbmc@assays$RNA$counts[245,]==0 & pbmc@assays$RNA$counts[8716,]>0, "CD8",
                               "Unknown")))

In [None]:
pbmc$cd4_or_cd8  %>% table

In [None]:
DimPlot(pbmc, group.by = "cd4_or_cd8")

In [None]:
options(future.globals.maxSize = 5e+09)

In [None]:
pbmc <- FindNeighbors(pbmc, dims = 1:12)
pbmc <- FindClusters(pbmc, resolution = 0.5)
options(repr.plot.width = 7, repr.plot.height = 5)
DimPlot(pbmc, label  = T, label.size = 10, repel = T)

In [None]:
options(repr.plot.width = 24, repr.plot.height = 12)

FeaturePlot(pbmc, features = c("CD8A"), split.by = "seurat_clusters", max.cutoff = 2) + plot_layout(ncol = 5, nrow = 3)

In [None]:
options(repr.plot.width = 24, repr.plot.height = 12)

FeaturePlot(pbmc, features = c("CD4"), split.by = "seurat_clusters", max.cutoff = 2) + plot_layout(ncol = 5, nrow = 3)


### Reclustering cluster 10

In [None]:
sub  <- subset(pbmc, seurat_clusters == 10)

In [None]:
     sub <- NormalizeData(sub, verbose = FALSE)
        sub <- ScaleData(sub, verbose = FALSE)
        sub <- FindVariableFeatures(sub, nfeatures = 1000, verbose = FALSE)

sub <- RunPCA(sub, dims = 1:12)
sub <- RunUMAP(sub, reduction = "pca", dims = 1:12)
  

sub <- FindNeighbors(sub, dims = 1:12)

In [None]:
cl10 <- FindClusters(sub, resolution = 0.7)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cl10, label = T, label.size = 10)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

FeaturePlot(cl10, features = c("CD8A"),  max.cutoff = 2)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

FeaturePlot(cl10, features = c("CD4"),  max.cutoff = 2)

In [None]:
options(repr.plot.width = 24, repr.plot.height = 9)

FeaturePlot(cl10, features = c("CD8A"), split.by = "seurat_clusters", max.cutoff = 2) + plot_layout(ncol = 5, nrow = 2)


In [None]:
options(repr.plot.width = 24, repr.plot.height = 9)

FeaturePlot(cl10, features = c("CD4"), split.by = "seurat_clusters", max.cutoff = 2) + plot_layout(ncol = 5, nrow = 2)


In [None]:
cl10_cd4  <- subset(cl10, seurat_clusters %in% c(0,1,5))
cl10_cd8  <- subset(cl10, seurat_clusters %in% c(2,4,7))
cl10_nk  <- subset(cl10, seurat_clusters %in% c(3,6))

### Reclustering cluster 12

In [None]:
sub  <- subset(pbmc, seurat_clusters == 12)

In [None]:
    sub <- NormalizeData(sub, verbose = FALSE)
        sub <- ScaleData(sub, verbose = FALSE)
        sub <- FindVariableFeatures(sub, nfeatures = 1000, verbose = FALSE)

sub <- RunPCA(sub, dims = 1:12)
sub <- RunUMAP(sub, reduction = "pca", dims = 1:12)
  

sub <- FindNeighbors(sub, dims = 1:12)
cl12 <- FindClusters(sub, resolution = 0.7)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cl12)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cl12)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

FeaturePlot(cl12, features = c("CD8A"),  max.cutoff = 2)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

FeaturePlot(cl12, features = c("CD4"),  max.cutoff = 2)

In [None]:
options(repr.plot.width = 24, repr.plot.height = 5)

FeaturePlot(cl12, features = c("CD8A"), split.by = "seurat_clusters", max.cutoff = 2) + plot_layout(ncol = 5, nrow = 1)


In [None]:
options(repr.plot.width = 24, repr.plot.height = 5)

FeaturePlot(cl12, features = c("CD4"), split.by = "seurat_clusters", max.cutoff = 2) + plot_layout(ncol = 5, nrow = 1)


In [None]:
cl12_cd4  <- subset(cl12, seurat_clusters %in% c(0,2,3,4))
cl12_cd8  <- subset(cl12, seurat_clusters %in% c(1))

### Reclustering cluster 6

In [None]:
sub  <- subset(pbmc, seurat_clusters == 6)

In [None]:
    sub <- NormalizeData(sub, verbose = FALSE)
        sub <- ScaleData(sub, verbose = FALSE)
        sub <- FindVariableFeatures(sub, nfeatures = 1000, verbose = FALSE)

sub <- RunPCA(sub, dims = 1:12)
sub <- RunUMAP(sub, reduction = "pca", dims = 1:12)
  

sub <- FindNeighbors(sub, dims = 1:12)

In [None]:
cl6 <- FindClusters(sub, resolution = 0.8)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
DimPlot(cl6, label = T, label.size = 10)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

FeaturePlot(cl6, features = c("CD8A"),  max.cutoff = 2)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)

FeaturePlot(cl6, features = c("CD4"),  max.cutoff = 2)

In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)

FeaturePlot(cl6, features = c("CD8A"), split.by = "seurat_clusters", max.cutoff = 2) + plot_layout(ncol = 6, nrow = 2)


In [None]:
options(repr.plot.width = 20, repr.plot.height = 8)

FeaturePlot(cl6, features = c("CD4"), split.by = "seurat_clusters", max.cutoff = 2) + plot_layout(ncol = 6, nrow = 2)


In [None]:
cl6_cd4  <-  subset(cl6, seurat_clusters %in% c(0,1,3,5,6,7,9))
cl6_cd8  <-  subset(cl6, seurat_clusters %in% c(2,4,8,10))


In [None]:
md_pbmc  <- pbmc@meta.data

In [None]:
md_pbmc <- md_pbmc  %>% mutate(cd4_or_cd8_2 = 
                                ifelse(cd4_or_cd8 == "Unknown" | cd4_or_cd8 == "Both",
                                  case_when(seurat_clusters %in% c(0,5,7) ~ "CD8",
                                            seurat_clusters %in% c(1,2,3,8,9) ~ "CD4",
                                            seurat_clusters %in% c(4,11) ~ "NK_DN",
                                            barcode %in% c(cl6_cd4$barcode, 
                                                           cl10_cd4$barcode,
                                                           cl12_cd4$barcode) ~ "CD4",
                                            barcode %in% c(cl6_cd8$barcode, 
                                                           cl10_cd8$barcode,
                                                           cl12_cd8$barcode) ~ "CD8",
                                            barcode %in% c(cl10_nk$barcode) ~ "NK_DN"),
                                   cd4_or_cd8))

In [None]:
md_pbmc$cd4_or_cd8  %>% table

In [None]:
md_pbmc$cd4_or_cd8_2  %>% table

In [None]:
pbmc$cd4_or_cd8_2  <- md_pbmc$cd4_or_cd8_2

In [None]:
options(repr.plot.width = 8, repr.plot.height = 6)

DimPlot(pbmc, group.by = "cd4_or_cd8_2")

In [None]:
saveRDS(pbmc, paste0("../../../DATA_scRNAseq//Analysis of previously published data/081_ParseBio_1M_Diabetes/241020_ParseBio_200k_filtT.rds"))

# Analysis of NK DN cells

In [None]:
sub  <- subset(pbmc, cd4_or_cd8_2 == "NK_DN")
     sub <- NormalizeData(sub, verbose = FALSE)
        sub <- ScaleData(sub, verbose = FALSE)
        sub <- FindVariableFeatures(sub, nfeatures = 1000, verbose = FALSE)

sub <- RunPCA(sub, dims = 1:12)
sub <- RunUMAP(sub, reduction = "pca", dims = 1:12)
  

sub <- FindNeighbors(sub, dims = 1:12)
sub <- FindClusters(sub, resolution = 0.7)
            print("CLustering done!")
nk_dn  <- sub

In [None]:
DimPlot(nk_dn)

# Analysis of CD4 cells

In [None]:
options(future.globals.maxSize = 10e+09)

In [None]:
plan("sequential")

In [None]:
sub  <- subset(pbmc, cd4_or_cd8_2 == "CD4")
sub <- NormalizeData(sub, verbose = FALSE)
sub <- ScaleData(sub, verbose = FALSE)
sub <- FindVariableFeatures(sub, nfeatures = 1000, verbose = FALSE)

In [None]:
sub <- RunPCA(sub, dims = 1:10)
sub <- RunUMAP(sub, reduction = "pca", dims = 1:10)
  

sub <- FindNeighbors(sub, dims = 1:10)
sub <- FindClusters(sub, resolution = 0.5)
            print("Clustering done!")
cd4  <- sub

In [None]:
DimPlot(cd4)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
FeaturePlot(cd4, features = c("CD3D","CD8A","RORC","LGALS3",
                                         "GATA3","MKI67","ISG15","NCAM1",
                                         "TRGC1","TRDC","FOXP3","CTLA4",
                                         "IL4","IL5","NFKBIA","CD4"), ncol = 4)


# Analysis of CD8 cells

In [None]:
sub  <- subset(pbmc, cd4_or_cd8_2 == "CD8")
     sub <- NormalizeData(sub, verbose = FALSE)
        sub <- ScaleData(sub, verbose = FALSE)
        sub <- FindVariableFeatures(sub, nfeatures = 1000, verbose = FALSE)

In [None]:
sub <- RunPCA(sub, dims = 1:10)
sub <- RunUMAP(sub, reduction = "pca", dims = 1:10)
  

sub <- FindNeighbors(sub, dims = 1:10)
sub <- FindClusters(sub, resolution = 0.5)
            print("CLustering done!")
cd8  <- sub

In [None]:
DimPlot(cd8)

In [None]:
options(repr.plot.width = 16, repr.plot.height = 16)
FeaturePlot(cd8, features = c("SELL","EOMES","TBX21","LGALS3",
                                         "IFNG","MKI67","ISG15","NCAM1",
                                         "TRGC1","TRDC","IKZF2","CTLA4",
                                         "IL4","IL5","NFKBIA","CD4"), ncol = 4)

In [None]:
saveRDS(cd4, "../../../DATA_scRNAseq//Analysis of previously published data/081_ParseBio_1M_Diabetes/241020_ParseBio_200k_CD4.rds")
saveRDS(cd8, "../../../DATA_scRNAseq//Analysis of previously published data/081_ParseBio_1M_Diabetes/241020_ParseBio_200k_CD8.rds")
saveRDS(nk_dn, "../../../DATA_scRNAseq//Analysis of previously published data/081_ParseBio_1M_Diabetes/241020_ParseBio_200k_NK_DN.rds")

# DE in whole dataset

In [None]:
bulk_pbmc <- AggregateExpression(pbmc, return.seurat = T, slot = "counts", 
                                assays = "RNA", group.by = c("cd4_or_cd8_2", "sample", "Disease"))

bulk_pbmc

In [None]:
  Idents(bulk_pbmc) <- "Disease"
  de_markers <- FindMarkers(bulk_pbmc, ident.1 = "D", ident.2 = "H", slot = "counts", test.use = "DESeq2",
      verbose = F)
  de_markers$gene <- rownames(de_markers)

In [None]:
  ggplot(de_markers, aes(avg_log2FC, -log10(p_val))) + geom_point(size = 0.5, alpha = 0.5) + theme_bw() +
      ylab("-log10(unadjusted p-value)") + geom_text_repel(aes(label = ifelse(p_val_adj < 0.01, gene,
      "")), colour = "red", size = 3)

In [None]:
de_markers  %>% 
dplyr::filter(!grepl(gene, pattern = "^MT"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\."))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\-"))  %>% 
mutate(label = ifelse(gene %in% c("IL7R",
"LEF1",
"TCF7",
"CCR7",
"SELL",
"BACH2",
"NELL2",
"PRF1",
                                  "ZFP36L2",
                                  "CXCR4",
"NKG7",
"GZMB",
"CST7",
"GNLY",
"CX3CR1",
"CCL5",
"TNF",
"KLRG1",
"TBX21"), gene, NA_character_))  %>% 
ggplot(aes(avg_log2FC, -log10(p_val))) + geom_point(size = 0.5, alpha = 0.5) + theme_bw() +
      ylab("-log10(unadjusted p-value)") + geom_text_repel(aes(label = ifelse(p_val < 0.5, label,
      "")), colour = "red", size = 3)

In [None]:
fc.df.deseq  <- de_markers  %>% dplyr::filter(!grepl(gene, pattern = "^MT"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\."))  %>% 
  dplyr::filter(!grepl(gene, pattern = "LINC"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "HNRNP"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "^MIR"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "orf"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "^RP[LS]"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\-")) %>% 
arrange(desc(avg_log2FC)) %>% 
  dplyr::select(gene, avg_log2FC)

ranks_pbmc <- deframe(fc.df.deseq)
ranks_pbmc  

In [None]:
library(fgsea)
library(msigdbr)

In [None]:
fgseaRes <- fgsea(pathways = fgsea_sets_hs_c7, 
                 stats = ranks_pbmc)

In [None]:
fgseaRes  %>% arrange(pval)  %>% dplyr::filter(NES>0)

In [None]:
fgseaRes  %>% dplyr::filter(grepl(pathway, pattern = "NAIVE"))

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN"]],
               ranks_pbmc) + labs(title="GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN") +
ylim(c(-0.9,0.4))
#ggsave(filename = paste0("../figures/gsea/GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN.svg"), 
#       width = 9.5, height = 8, units = "cm")

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP"]],
               ranks_pbmc) + labs(title="GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP") +
ylim(c(-0.9,0.4))
#ggsave(filename = paste0("../figures/gsea/GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN.svg"), 
#       width = 9.5, height = 8, units = "cm")

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["KAECH_NAIVE_VS_MEMORY_CD8_TCELL_DN"]],
               ranks_cd8_t0) + labs(title="KAECH_NAIVE_VS_MEMORY_CD8_TCELL_DN") +
ylim(c(-0.9,0.4))

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["KAECH_NAIVE_VS_MEMORY_CD8_TCELL_UP"]],
               ranks_cd8_t0) + labs(title="KAECH_NAIVE_VS_MEMORY_CD8_TCELL_UP") +
ylim(c(-0.9,0.4))

# DE in subpopulations - heatmap

In [None]:
pbmc$Disease  <- substr(pbmc$sample,1,1)

In [None]:
pbmc$Disease_CellType  <- paste(pbmc$Disease, pbmc$cd4_or_cd8_2)

In [None]:
avgexp = AverageExpression(pbmc,
                             return.seurat = F, group.by = "Disease_CellType", 
                          assay = "RNA")

In [None]:
genes  <- c("PCBP2","PCBP1",
            "CX3CR1","TNF","GZMB","GZMA","PRF1","NKG7","GNLY","CCL5","CST7",
            "BTG1","SELL","IL7R","CCR7","BTG2","SLAMF6","LEF1",
             "TNFAIP3","TSC22D3","NKFBIA","DUSP1")

In [None]:
avgexp  <- avgexp$RNA[which(rownames(avgexp$RNA) %in% genes),]

In [None]:
library(pheatmap)

options(repr.plot.width = 4, repr.plot.height = 9)
pheatmap(avgexp, main = "", scale = "row", cluster_cols = F, cluster_rows = T,
        color=colorRampPalette(c("dodgerblue", "grey95", "indianred2"))(50), 
         border_color = "white", width = 9, height = 3.3,
                  fontsize = 9)

## DE CD4

In [None]:
bulk_pbmc <- AggregateExpression(pbmc, return.seurat = T, slot = "counts", 
                                assays = "RNA", group.by = c("cd4_or_cd8_2", "sample", "Disease"))

In [None]:
bulk_pbmc

In [None]:
  cd4.bulk <- subset(bulk_pbmc, cd4_or_cd8_2 == "CD4")
  Idents(cd4.bulk) <- "Disease"
  de_markers <- FindMarkers(cd4.bulk, ident.1 = "D", ident.2 = "H", slot = "counts", test.use = "DESeq2",
      verbose = F)
  de_markers$gene <- rownames(de_markers)

In [None]:
  ggplot(de_markers, aes(avg_log2FC, -log10(p_val))) + geom_point(size = 0.5, alpha = 0.5) + theme_bw() +
      ylab("-log10(unadjusted p-value)") + geom_text_repel(aes(label = ifelse(p_val_adj < 0.01, gene,
      "")), colour = "red", size = 3)

In [None]:
de_markers  %>% 
dplyr::filter(!grepl(gene, pattern = "^MT"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\."))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\-"))  %>% 
mutate(label = ifelse(gene %in% c("IL7R",
"LEF1",
"TCF7",
"CCR7",
"SELL",
"BACH2",
"NELL2",
"PRF1",
                                  "ZFP36L2",
                                  "CXCR4",
"NKG7",
"GZMB",
"CST7",
"GNLY",
"CX3CR1",
"CCL5",
"TNF",
"KLRG1",
"TBX21"), gene, NA_character_))  %>% 
ggplot(aes(avg_log2FC, -log10(p_val))) + geom_point(size = 0.5, alpha = 0.5) + theme_bw() +
      ylab("-log10(unadjusted p-value)") + geom_text_repel(aes(label = ifelse(p_val < 0.5, label,
      "")), colour = "red", size = 3)

In [None]:
fc.df.deseq  <- de_markers  %>% dplyr::filter(!grepl(gene, pattern = "^MT"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\."))  %>% 
  dplyr::filter(!grepl(gene, pattern = "LINC"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "HNRNP"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "^MIR"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "orf"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "^RP[LS]"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\-")) %>% 
arrange(desc(avg_log2FC)) %>% 
  dplyr::select(gene, avg_log2FC)

ranks_cd4_t0 <- deframe(fc.df.deseq)
ranks_cd4_t0  

In [None]:
library(fgsea)
library(msigdbr)

In [None]:
mrk_fc

In [None]:
fgseaRes <- fgsea(pathways = fgsea_sets_hs_c7, 
                 stats = ranks_cd4_t0)

In [None]:
fgseaRes  %>% arrange(pval)

In [None]:
fgseaRes  %>% dplyr::filter(grepl(pathway, pattern = "GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL"))

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN"]],
               ranks_cd4_t0) + labs(title="GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN") +
ylim(c(-0.9,0.4))
#ggsave(filename = paste0("../figures/gsea/GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN.svg"), 
#       width = 9.5, height = 8, units = "cm")

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP"]],
               ranks_cd4_t0) + labs(title="GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP") +
ylim(c(-0.9,0.4))
#ggsave(filename = paste0("../figures/gsea/GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN.svg"), 
#       width = 9.5, height = 8, units = "cm")

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["KAECH_NAIVE_VS_MEMORY_CD8_TCELL_DN"]],
               ranks_cd4_t0) + labs(title="KAECH_NAIVE_VS_MEMORY_CD8_TCELL_DN") +
ylim(c(-0.9,0.4))

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["KAECH_NAIVE_VS_MEMORY_CD8_TCELL_UP"]],
               ranks_cd4_t0) + labs(title="KAECH_NAIVE_VS_MEMORY_CD8_TCELL_UP") +
ylim(c(-0.9,0.4))

## DE CD8

In [None]:
bulk_pbmc <- AggregateExpression(pbmc, return.seurat = T, slot = "counts", 
                                assays = "RNA", group.by = c("cd4_or_cd8_2", "sample", "Disease"))

In [None]:
bulk_pbmc

In [None]:
  cd8.bulk <- subset(bulk_pbmc, cd4_or_cd8_2 == "CD8")
  Idents(cd8.bulk) <- "Disease"
  de_markers <- FindMarkers(cd8.bulk, ident.1 = "D", ident.2 = "H", slot = "counts", test.use = "DESeq2",
      verbose = F)
  de_markers$gene <- rownames(de_markers)

In [None]:
  ggplot(de_markers, aes(avg_log2FC, -log10(p_val))) + geom_point(size = 0.5, alpha = 0.5) + theme_bw() +
      ylab("-log10(unadjusted p-value)") + geom_text_repel(aes(label = ifelse(p_val_adj < 0.01, gene,
      "")), colour = "red", size = 3)

In [None]:
de_markers  %>% 
dplyr::filter(!grepl(gene, pattern = "^MT"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\."))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\-"))  %>% 
mutate(label = ifelse(gene %in% c("IL7R",
"LEF1",
"TCF7",
"CCR7",
"SELL",
"BACH2",
"NELL2",
"PRF1",
                                  "ZFP36L2",
                                  "CXCR4",
"NKG7",
"GZMB",
"CST7",
"GNLY",
"CX3CR1",
"CCL5",
"TNF",
"KLRG1",
"TBX21"), gene, NA_character_))  %>% 
ggplot(aes(avg_log2FC, -log10(p_val))) + geom_point(size = 0.5, alpha = 0.5) + theme_bw() +
      ylab("-log10(unadjusted p-value)") + geom_text_repel(aes(label = ifelse(p_val < 0.5, label,
      "")), colour = "red", size = 3)

In [None]:
fc.df.deseq  <- de_markers  %>% dplyr::filter(!grepl(gene, pattern = "^MT"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\."))  %>% 
  dplyr::filter(!grepl(gene, pattern = "LINC"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "HNRNP"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "^MIR"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "orf"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "^RP[LS]"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\-")) %>% 
arrange(desc(avg_log2FC)) %>% 
  dplyr::select(gene, avg_log2FC)

ranks_cd8_t0 <- deframe(fc.df.deseq)
ranks_cd8_t0  

In [None]:
library(fgsea)
library(msigdbr)

In [None]:
fgseaRes <- fgsea(pathways = fgsea_sets_hs_c7, 
                 stats = ranks_cd8_t0)

In [None]:
fgseaRes  %>% arrange(pval)

In [None]:
fgseaRes  %>% dplyr::filter(grepl(pathway, pattern = "GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL"))

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN"]],
               ranks_cd8_t0) + labs(title="GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN") +
ylim(c(-0.9,0.4))
#ggsave(filename = paste0("../figures/gsea/GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN.svg"), 
#       width = 9.5, height = 8, units = "cm")

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP"]],
               ranks_cd8_t0) + labs(title="GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_UP") +
ylim(c(-0.9,0.4))
#ggsave(filename = paste0("../figures/gsea/GSE11057_NAIVE_VS_EFF_MEMORY_CD4_TCELL_DN.svg"), 
#       width = 9.5, height = 8, units = "cm")

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["KAECH_NAIVE_VS_MEMORY_CD8_TCELL_DN"]],
               ranks_cd8_t0) + labs(title="KAECH_NAIVE_VS_MEMORY_CD8_TCELL_DN") +
ylim(c(-0.9,0.4))

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["KAECH_NAIVE_VS_MEMORY_CD8_TCELL_UP"]],
               ranks_cd8_t0) + labs(title="KAECH_NAIVE_VS_MEMORY_CD8_TCELL_UP") +
ylim(c(-0.9,0.4))

## DE NK

In [None]:
  nk.bulk <- subset(bulk_pbmc, cd4_or_cd8_2 == "NK-DN")
  Idents(nk.bulk) <- "Disease"
  de_markers <- FindMarkers(nk.bulk, ident.1 = "D", ident.2 = "H", slot = "counts", test.use = "DESeq2",
      verbose = F)
  de_markers$gene <- rownames(de_markers)

In [None]:
  ggplot(de_markers, aes(avg_log2FC, -log10(p_val))) + geom_point(size = 0.5, alpha = 0.5) + theme_bw() +
      ylab("-log10(unadjusted p-value)") + geom_text_repel(aes(label = ifelse(p_val_adj < 0.01, gene,
      "")), colour = "red", size = 3)

In [None]:
de_markers  %>% 
dplyr::filter(!grepl(gene, pattern = "^MT"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\."))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\-"))  %>% 
mutate(label = ifelse(gene %in% c("IL7R",
"LEF1",
"TCF7",
"CCR7",
"SELL",
"BACH2",
"NELL2",
"PRF1",
                                  "ZFP36L2",
                                  "CXCR4",
"NKG7",
"GZMB",
"CST7",
"GNLY",
"CX3CR1",
"CCL5",
"TNF",
"KLRG1",
"TBX21"), gene, NA_character_))  %>% 
ggplot(aes(avg_log2FC, -log10(p_val))) + geom_point(size = 0.5, alpha = 0.5) + theme_bw() +
      ylab("-log10(unadjusted p-value)") + geom_text_repel(aes(label = ifelse(p_val < 0.5, label,
      "")), colour = "red", size = 3)

In [None]:
fc.df.deseq  <- de_markers  %>% dplyr::filter(!grepl(gene, pattern = "^MT"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\."))  %>% 
  dplyr::filter(!grepl(gene, pattern = "LINC"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "HNRNP"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "^MIR"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "orf"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "^RP[LS]"))  %>% 
  dplyr::filter(!grepl(gene, pattern = "\\-")) %>% 
arrange(desc(avg_log2FC)) %>% 
  dplyr::select(gene, avg_log2FC)

ranks_nk_t0 <- deframe(fc.df.deseq)
ranks_nk_t0  

In [None]:
library(fgsea)
library(msigdbr)

In [None]:
fgseaRes <- fgsea(pathways = fgsea_sets_hs_c7, 
                 stats = ranks_nk_t0)

In [None]:
fgseaRes  %>% arrange(pval)

In [None]:
fgseaRes  %>% dplyr::filter(grepl(pathway, pattern = "NAIVE"))

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_UP"]],
               ranks_nk_t0) + labs(title="GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_UP") +
ylim(c(-0.9,0.4))
#ggsave(filename = paste0("../figures/gsea/GSE11057_NAIVE_VS_EFF_MEMORY_nk_TCELL_DN.svg"), 
#       width = 9.5, height = 8, units = "cm")

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_DN"]],
               ranks_nk_t0) + labs(title="GOLDRATH_NAIVE_VS_EFF_CD8_TCELL_DN") +
ylim(c(-0.9,0.4))
#ggsave(filename = paste0("../figures/gsea/GSE11057_NAIVE_VS_EFF_MEMORY_nk_TCELL_DN.svg"), 
#       width = 9.5, height = 8, units = "cm")

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["GSE9650_NAIVE_VS_EFF_CD8_TCELL_DN"]],
               ranks_nk_t0) + labs(title="GSE9650_NAIVE_VS_EFF_CD8_TCELL_DN") +
ylim(c(-0.9,0.4))

In [None]:
options(repr.plot.width = 5, repr.plot.height = 3)
plotEnrichment(fgsea_sets_hs_c7[["GSE9650_NAIVE_VS_EFF_CD8_TCELL_UP"]],
               ranks_nk_t0) + labs(title="GSE9650_NAIVE_VS_EFF_CD8_TCELL_UP") +
ylim(c(-0.9,0.4))