# Part 24: Revisions

In tis document, we address various questions raised by the reviewers. 

In [None]:
source("diabetes_analysis_v07.R")

# Phenotype of Naive cells

In [None]:
cd4_l2_subcluster  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L2/cd4_subcluster.rds")
cd8_l2_subcluster  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L2/cd8_l2_subcluster.rds")
cd4_naive  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L3/cd4_l3_naive.rds")
cd8_naive  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L3/cd8_l3_naive.rds")


## Coexpression of naive markers in the same cells

### CD4

In [None]:
DefaultAssay(cd4_naive)  <- "RNA"

In [None]:
options(repr.plot.width = 9, repr.plot.height = 5)
DimPlot(cd4_naive, group.by = "annotations_manual")

In [None]:
options(repr.plot.width = 15, repr.plot.height = 8)
FeaturePlot(cd4_naive, features = c("LEF1", "BACH2", "IL7R", "CXCR4", "ZFP36L2",
                                    "BTG1","BTG2","KLF2","IL7R"), min.cutoff = 0,
           ncol = 4)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
VlnPlot(cd4_naive, group.by = "annotations_manual", features = c("LEF1"), pt.size = 0) + NoLegend()
VlnPlot(cd4_naive, group.by = "annotations_manual", features = c("BACH2"), pt.size = 0) + NoLegend()
VlnPlot(cd4_naive, group.by = "annotations_manual", features = c("IL7R"), pt.size = 0) + NoLegend()
VlnPlot(cd4_naive, group.by = "annotations_manual", features = c("CXCR4"), pt.size = 0) + NoLegend()
VlnPlot(cd4_naive, group.by = "annotations_manual", features = c("ZFP36L2"), pt.size = 0) + NoLegend()
VlnPlot(cd4_naive, group.by = "annotations_manual", features = c("BTG1"), pt.size = 0) + NoLegend()
VlnPlot(cd4_naive, group.by = "annotations_manual", features = c("BTG2"), pt.size = 0) + NoLegend()
VlnPlot(cd4_naive, group.by = "annotations_manual", features = c("KLF2"), pt.size = 0) + NoLegend()

In [None]:
options(repr.plot.width=10, repr.plot.height=16)
pct_expressing_boxplot(seurat_object = cd4_naive, group.by = "annotations_manual", gene = "LEF1", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd4_naive, group.by = "annotations_manual", gene = "BACH2", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd4_naive, group.by = "annotations_manual", gene = "IL7R", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd4_naive, group.by = "annotations_manual", gene = "CXCR4", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd4_naive, group.by = "annotations_manual", gene = "ZFP36L2", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd4_naive, group.by = "annotations_manual", gene = "BTG1", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd4_naive, group.by = "annotations_manual", gene = "BTG2", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd4_naive, group.by = "annotations_manual", gene = "KLF2", sample.col = "Sample_ID") +
plot_layout(ncol = 4)

### CD8

In [None]:
options(repr.plot.width = 9, repr.plot.height = 5)
DimPlot(cd8_naive, group.by = "annotations_manual")

In [None]:
DefaultAssay(cd8_naive)  <- "RNA"

In [None]:
options(repr.plot.width = 15, repr.plot.height = 8)
FeaturePlot(cd8_naive, features = c("LEF1", "BACH2", "IL7R", "CXCR4", "ZFP36L2",
                                    "BTG1","BTG2","KLF2","IL7R"), min.cutoff = 0,
           ncol = 4)

In [None]:
options(repr.plot.width = 6, repr.plot.height = 5)
VlnPlot(cd8_naive, group.by = "annotations_manual", features = c("LEF1"), pt.size = 0) + NoLegend()
VlnPlot(cd8_naive, group.by = "annotations_manual", features = c("BACH2"), pt.size = 0) + NoLegend()
VlnPlot(cd8_naive, group.by = "annotations_manual", features = c("IL7R"), pt.size = 0) + NoLegend()
VlnPlot(cd8_naive, group.by = "annotations_manual", features = c("CXCR4"), pt.size = 0) + NoLegend()
VlnPlot(cd8_naive, group.by = "annotations_manual", features = c("ZFP36L2"), pt.size = 0) + NoLegend()
VlnPlot(cd8_naive, group.by = "annotations_manual", features = c("BTG1"), pt.size = 0) + NoLegend()
VlnPlot(cd8_naive, group.by = "annotations_manual", features = c("BTG2"), pt.size = 0) + NoLegend()
VlnPlot(cd8_naive, group.by = "annotations_manual", features = c("KLF2"), pt.size = 0) + NoLegend()

In [None]:
pct_expressing_boxplot  <- function(seurat_object, gene, group.by = "annotations_l2", sample.col = "sample"){
   rn = which(rownames(seurat_object@assays$RNA)==gene)
ggtheme = function() {
  theme(
    axis.text = element_text(size = 20),
    axis.title = element_text(size = 20),
    text = element_text(size = 20, colour = "black"),
    legend.text = element_text(size = 20),
    legend.key.size =  unit(10, units = "points")
    
  )
}

df = data.frame(grouping_var = seurat_object@meta.data[[group.by]],
               value = seurat_object@assays$RNA@counts[rn,], 
               sample = seurat_object@meta.data[[sample.col]])  %>% 
mutate(expressing = if_else(value>0,1,0))  %>% 
dplyr::select(-value)  %>% 
group_by(sample, grouping_var)  %>% 
summarise(mean_expression = mean(expressing))  %>% 
pivot_wider(names_from = sample, values_from = mean_expression, values_fill = 0)  %>% 
pivot_longer(!grouping_var, names_to = "sample", values_to = "expressing")

plt = ggplot(data = df, aes(x = grouping_var, y = expressing)) +
geom_boxplot(outlier.shape = NA, aes(fill = grouping_var), alpha = 0.3) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(width = 0.1, height = 0.0, size = 2, aes(color = grouping_var)) +
theme_classic() +
    theme(plot.title = element_text(hjust = 0.5)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
ggtheme() +
    ggtitle(gene) +
    ylab("Pct expressing cells") +
xlab("") + NoLegend()
    return(plt)
    }


In [None]:
options(repr.plot.width=10, repr.plot.height=16)
pct_expressing_boxplot(seurat_object = cd8_naive, group.by = "annotations_manual", gene = "LEF1", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_naive, group.by = "annotations_manual", gene = "BACH2", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_naive, group.by = "annotations_manual", gene = "IL7R", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_naive, group.by = "annotations_manual", gene = "CXCR4", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_naive, group.by = "annotations_manual", gene = "ZFP36L2", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_naive, group.by = "annotations_manual", gene = "BTG1", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_naive, group.by = "annotations_manual", gene = "BTG2", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_naive, group.by = "annotations_manual", gene = "KLF2", sample.col = "Sample_ID") +
plot_layout(ncol = 4)

In [None]:
DefaultAssay(cd8_l2_subcluster)  <- "RNA"

options(repr.plot.width = 15, repr.plot.height = 8)
FeaturePlot(cd8_l1_full_filt, features = c("LEF1", "BACH2", "IL7R", "CXCR4", "ZFP36L2",
                                    "BTG1","BTG2","KLF2","IL7R"), min.cutoff = 0,
           ncol = 4)

In [None]:
DefaultAssay(cd8_l2_subcluster)  <- "RNA"

options(repr.plot.width = 15, repr.plot.height = 8)
FeaturePlot(cd8_l1_full_filt, features = c("KLRG1", "SELL", "CCR7", "CXCR3", "TCF7",
                                    "CX3CR1","CD27","CD28"), min.cutoff = 0,
           ncol = 4)

In [None]:
options(repr.plot.width=13, repr.plot.height=16)
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt, group.by = "annotations_l2", gene = "LEF1", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt, group.by = "annotations_l2", gene = "BACH2", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt, group.by = "annotations_l2", gene = "IL7R", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt, group.by = "annotations_l2", gene = "CXCR4", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt, group.by = "annotations_l2", gene = "ZFP36L2", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt, group.by = "annotations_l2", gene = "BTG1", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt, group.by = "annotations_l2", gene = "BTG2", sample.col = "Sample_ID") +
pct_expressing_boxplot(seurat_object = cd8_l1_full_filt, group.by = "annotations_l2", gene = "KLF2", sample.col = "Sample_ID") +
plot_layout(ncol = 4)

## Wherry annotations

In [None]:
load("../../VN_misc/ref_wherry_new.RData")

In [None]:
ref_wherry_new

In [None]:
plan("multisession")

In [None]:
cd8_naive_matrix  <- cd8_naive@assays$RNA@counts

cd8_full_matrix  <- cd8_l1_full_filt@assays$RNA@counts


### Annotation of CD8 Naive

In [None]:
### Annotate the dataset with Wherry dataset
        pred.singler2 <- SingleR(test = cd8_naive_matrix, ref = ref_wherry_new$matrix,
        labels = ref_wherry_new$labels, de.method="wilcox", fine.tune = F, num.threads = 4)


In [None]:
pred.singler2$scores

In [None]:
mtx  <- pred.singler2$scores
rownames(mtx)  <- colnames(cd8_naive$RNA)

In [None]:
mtx

In [None]:
cd8_naive$singler  <- pred.singler2$labels

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4.5)
DimPlot(cd8_naive, group.by = "singler", shuffle = T)

In [None]:
test  <- data.frame(annotation = cd8_naive$annotations_manual,
                    pred = cd8_naive$singler, 
                    score_CD8_Naive = mtx[,"CD8_Naive"],
                    score_CD8_CM = mtx[,"CD8_CM"],
                    score_CD8_SCM_R3neg = mtx[,"CD8_SCM-R3-"],
                    score_CD8_SCM_R3pos = mtx[,"CD8_SCM-R3+"]
                     )

In [None]:
test

In [None]:
df2 <- test %>% group_by(annotation, pred) %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
df2

In [None]:
options(repr.plot.width = 16, repr.plot.height = 4.5)
ggplot(test) +
  aes(x = annotation, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
coord_flip() 
#scale_fill_manual(values = c("#d6ebd2ff", "lightskyblue1", "red2"))
#scale_fill_manual(values = c("grey","#74bc68ff", "dodgerblue3", "red2"))

### Annotation of CD8 All

In [None]:
### Annotate the dataset with Wherry dataset
        pred.singler2 <- SingleR(test = cd8_full_matrix, ref = ref_wherry_new$matrix,
        labels = ref_wherry_new$labels, de.method="wilcox", fine.tune = F, num.threads = 4)


In [None]:
pred.singler2$scores

In [None]:
mtx  <- pred.singler2$scores
rownames(mtx)  <- colnames(cd8_l1_full_filt$RNA)

In [None]:
mtx

In [None]:
cd8_l1_full_filt$singler  <- pred.singler2$labels

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4.5)
DimPlot(cd8_l1_full_filt, group.by = "singler", shuffle = T)

In [None]:
test  <- data.frame(annotation = cd8_l1_full_filt$annotations_l2,
                    pred = cd8_l1_full_filt$singler, 
                    score_CD8_Naive = mtx[,"CD8_Naive"],
                    score_CD8_CM = mtx[,"CD8_CM"],
                    score_CD8_SCM_R3neg = mtx[,"CD8_SCM-R3-"],
                    score_CD8_SCM_R3pos = mtx[,"CD8_SCM-R3+"]
                     )

In [None]:
test

In [None]:
df2 <- test %>% group_by(annotation, pred) %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
df2

In [None]:
options(repr.plot.width = 16, repr.plot.height = 4.5)
ggplot(test) +
  aes(x = annotation, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
coord_flip() 
#scale_fill_manual(values = c("#d6ebd2ff", "lightskyblue1", "red2"))
#scale_fill_manual(values = c("grey","#74bc68ff", "dodgerblue3", "red2"))

### Annotation of CD4 Naive

In [None]:
cd4_naive_matrix  <- cd4_naive@assays$RNA@counts
cd4_full_matrix <- cd4_l1_full_filt@assays$RNA@counts

In [None]:
### Annotate the dataset with Wherry dataset
        pred.singler2 <- SingleR(test = cd4_naive_matrix, ref = ref_wherry_new$matrix,
        labels = ref_wherry_new$labels, de.method="wilcox", fine.tune = F, num.threads = 4)


In [None]:
pred.singler2$scores

In [None]:
mtx  <- pred.singler2$scores
rownames(mtx)  <- colnames(cd4_naive$RNA)

In [None]:
mtx

In [None]:
cd4_naive$singler  <- pred.singler2$labels

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4.5)
DimPlot(cd4_naive, group.by = "singler", shuffle = T)

In [None]:
test  <- data.frame(annotation = cd4_naive$annotations_manual,
                    pred = cd4_naive$singler, 
                    score_cd4_BulkNaive = mtx[,"CD4_BulkNaive"],
                    score_cd8_Naive = mtx[,"CD8_Naive"],
                    score_cd8_CM = mtx[,"CD8_CM"],
                    score_cd8_SCM_R3neg = mtx[,"CD8_SCM-R3-"],
                    score_cd8_SCM_R3pos = mtx[,"CD8_SCM-R3+"]
                     )

In [None]:
test

In [None]:
df2 <- test %>% group_by(annotation, pred) %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
df2

In [None]:
options(repr.plot.width = 16, repr.plot.height = 4.5)
ggplot(test) +
  aes(x = annotation, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
coord_flip() 
#scale_fill_manual(values = c("#d6ebd2ff", "lightskyblue1", "red2"))
#scale_fill_manual(values = c("grey","#74bc68ff", "dodgerblue3", "red2"))

### Annotation of CD4 All

In [None]:
### Annotate the dataset with Wherry dataset
        pred.singler2 <- SingleR(test = cd4_full_matrix, ref = ref_wherry_new$matrix,
        labels = ref_wherry_new$labels, de.method="wilcox", fine.tune = F, num.threads = 4)


In [None]:
pred.singler2$scores

In [None]:
mtx  <- pred.singler2$scores
rownames(mtx)  <- colnames(cd4_l1_full_filt$RNA)

In [None]:
mtx

In [None]:
cd4_l1_full_filt$singler  <- pred.singler2$labels

In [None]:
options(repr.plot.width = 6, repr.plot.height = 4.5)
DimPlot(cd4_l1_full_filt, group.by = "singler", shuffle = T)

In [None]:
test  <- data.frame(annotation = cd4_l1_full_filt$annotations_l2,
                    pred = cd4_l1_full_filt$singler, 
                    score_cd4_BulkNaive = mtx[,"CD4_BulkNaive"],
                    score_cd8_Naive = mtx[,"CD8_Naive"],
                    score_cd8_CM = mtx[,"CD8_CM"],
                    score_cd8_SCM_R3neg = mtx[,"CD8_SCM-R3-"],
                    score_cd8_SCM_R3pos = mtx[,"CD8_SCM-R3+"]
                     )

In [None]:
test

In [None]:
df2 <- test %>% group_by(annotation, pred) %>% 
  summarise(n = n()) %>%
  mutate(freq = n / sum(n))

In [None]:
df2

In [None]:
options(repr.plot.width = 16, repr.plot.height = 4.5)
ggplot(test) +
  aes(x = annotation, fill = factor(pred)) +
  geom_bar(position = "fill") + 
theme_classic() + 
ggtheme() +
coord_flip() 
#scale_fill_manual(values = c("#d6ebd2ff", "lightskyblue1", "red2"))
#scale_fill_manual(values = c("grey","#74bc68ff", "dodgerblue3", "red2"))

### Coexpression - coexpressing vs non-coexpressing cells

### CD4

In [None]:
seurat_object = cd4_naive
genes = c("LEF1", "BACH2", "IL7R", "CXCR4", "ZFP36L2", "SELL",
                                    "BTG1","BTG2","KLF2","IL7R","CCR7")


In [None]:
rns = which(rownames(seurat_object@assays$RNA) %in% genes)
df = seurat_object@assays$RNA@counts[rns,]  %>% t()  %>% as.data.frame()

In [None]:
df

In [None]:
binary  <- function(x){
    x  <- if_else(x>0,1,0)
    return(x)
}

In [None]:
df = df  %>% 
mutate_at(.vars = 1:9, .funs = binary)

In [None]:
df

In [None]:
df$annot  <- seurat_object$annotations_manual


In [None]:
plt = ggplot(data = df, aes(x = grouping_var, y = expressing)) +
geom_boxplot(outlier.shape = NA, aes(fill = grouping_var), alpha = 0.3) + 
  geom_dotplot(binaxis='y', stackdir='center', dotsize=0) + 
  geom_jitter(width = 0.1, height = 0.0, size = 2, aes(color = grouping_var)) +
theme_classic() +
    theme(plot.title = element_text(hjust = 0.5)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
ggtheme() +
    ggtitle(gene) +
    ylab("Pct expressing cells") +
xlab("") + NoLegend()
    return(plt)
    }

## Which cell type is responsible for the skewed naive vs effector phenotype?

In [None]:
paths_fc  <- list.files("../../240617_VN_Diabetes_V06/tables/fold_change/", full.names = T)
dataset_names  <- list.files("../../240617_VN_Diabetes_V06/tables/fold_change", full.names = F)


In [None]:
paths_fc

In [None]:
paths_fc  <- paths_fc[which(grepl(paths_fc, pattern = "Ctrl")|grepl(paths_fc, pattern = "DiaT0_vs_DiaT1"))]

In [None]:
dataset_names  <- dataset_names[which(grepl(dataset_names, pattern = "Ctrl")|grepl(dataset_names, pattern = "DiaT0_vs_DiaT1"))]

In [None]:
dataset_names

In [None]:
paths_fc

In [None]:
fc.df  <- read_csv(paths_fc[1])  %>% as.data.frame()

In [None]:
fc.df %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(x = cluster, pattern = "Dia")) 

In [None]:
fc.df %>%  arrange(desc(avg_log2FC)) %>% 
dplyr::filter(grepl(x = cluster, pattern = "Dia"))  %>% 
dplyr::filter(gene %in% c("LEF1","BACH2", "NELL2", "TCF7", "KLF2","CCR7","SELL",
                          "IFNG","TBX21","TNF"))

In [None]:
ls()

# DEG Pseudobulk + DESeq2

In [None]:
cd4_l1_full_filt  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L1/cd4_l1_full_filt.rds")
cd8_l1_full_filt  <- readRDS("../../240218_VN_Diabetes_V05/data/processed/L1/cd8_l1_full_filt.rds")

For DEG analysis, we will only use samples from the final experiment. 

In [None]:
cd4_l1_full_filt_sub  <- subset(cd4_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20"))

In [None]:
cd8_l1_full_filt_sub  <- subset(cd8_l1_full_filt, Experiment_ID %in% c("Exp16","Exp18","Exp19","Exp20"))

In [None]:
cd4_l1_full_filt$annotations_l2  %>% table

In [None]:
cd8_l1_full_filt$annotations_l2  %>% table

In [None]:
cd8_l1_full_filt_sub$annotations_l2  %>% table

To be consistent with the Wilcoxon Fold Change analysis, we will group together MAIT and Tgd cells. 

In [None]:
cd8_l1_full_filt_sub$annotations_l2  <- ifelse(grepl(cd8_l1_full_filt_sub$annotations_l2, pattern = "Unconventional"),
                                               "CD8 Unc",cd8_l1_full_filt_sub$annotations_l2)

In [None]:
cd8_l1_full_filt_sub$annotations_l2  %>% table

The DEG will be performed on the RNA assay. We will perform pseudobulk aggregation based on sample and then use DESeq2 to calculate DEG and fold changes. 

## CD8 T cells

In [None]:
genes  <- rownames(cd8_l1_full_filt_sub@assays$RNA)

In [None]:
genes_filt  <- genes[!(grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "\\.")|
                            grepl(genes, pattern = "LINC")|
                            grepl(genes, pattern = "^MIR")|
                            grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "HNRNP")|
                            grepl(genes, pattern = "^RP[LS]")|
                            grepl(genes, pattern = "\\-")|
                            grepl(genes, pattern = "TTTY")|
                            grepl(genes, pattern = "ORF")|
                            grepl(genes, pattern = "orf"))
                            ]

In [None]:
aggexp_cd8  <- AggregateExpression(cd8_l1_full_filt_sub, group.by = c("Sample_ID"), 
                                  features = genes_filt, return.seurat = T, assay = "RNA")

### Metadata consolidation

We need to add metadata with Partial response and ketoacidosis to the pseudobulked object. 

In [None]:
md_for_remission  <- cd8_l1_full_filt_sub@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, part_remission_y_n)  %>% 
tally %>% mutate(Condition2 = paste0("PR_",part_remission_y_n))  %>% 
dplyr::select(-n, -part_remission_y_n)

In [None]:
md_for_remission

In [None]:
cd8_l1_full_filt_sub$Condition2  %>% table

In [None]:
cd8_l1_full_filt_sub$Condition2  <- NULL
cd8_l1_full_filt_sub@meta.data  <- cd8_l1_full_filt_sub@meta.data  %>% 
left_join(md_for_remission)  %>% mutate(Condition2 = paste(Condition2, Time))  %>% 
mutate(Condition2 = ifelse(grepl(Condition2, pattern = "NA"), NA_character_,Condition2))
    
print(cd8_l1_full_filt_sub$Condition2  %>% table)
rownames(cd8_l1_full_filt_sub@meta.data)  <- colnames(cd8_l1_full_filt_sub)

In [None]:
md_for_ketoacidosis  <- cd8_l1_full_filt_sub@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, ph_man)  %>% 
tally %>% mutate(Ketoacidosis = ifelse(ph_man<7.3,"Keto_1","Keto_0"))  %>% 
dplyr::select(-ph_man, -n)

In [None]:
md_for_ketoacidosis

In [None]:
 cd8_l1_full_filt_sub@meta.data  <- cd8_l1_full_filt_sub@meta.data  %>% 
    left_join(md_for_ketoacidosis)   %>% mutate(Keto_Time = paste(Ketoacidosis, Time))  %>% 
    mutate(Keto_Time = ifelse(grepl(Keto_Time, pattern = "NA"), NA_character_,Keto_Time))
    
    print(cd8_l1_full_filt_sub$Ketoacidosis  %>% table)
    rownames(cd8_l1_full_filt_sub@meta.data)  <- colnames(cd8_l1_full_filt_sub)

In [None]:
Conditions  <- list(c("Dia T0", "Ctrl T0"),
                    c("Dia T0", "Dia T1"),
                    c("Dia T1", "Ctrl T0"),
                    c("PR_0 T0", "PR_1 T0"),
                    c("PR_0 T1", "PR_1 T1"),
                    c("PR_0 T0", "PR_0 T1"),
                    c("PR_1 T0", "PR_1 T1"),
                    c("Keto_1 T0", "Keto_0 T0"))

In [None]:
md_cd8  <- cd8_l1_full_filt_sub@meta.data  %>% 
group_by(Sample_ID, Condition, Condition2, Age, Sex, Ketoacidosis, Keto_Time)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% 
mutate(Sample_ID = as.factor(Sample_ID))

In [None]:
md_cd8$Sample_ID  <- paste0("g",md_cd8$Sample_ID)

In [None]:

md_cd8  <- left_join(aggexp_cd8@meta.data, md_cd8)
rownames(md_cd8)  <- colnames(aggexp_cd8)

### CD8 L1 dataset

In [None]:
aggexp_cd8@meta.data  <- md_cd8

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Markers_DataFrame  <- function(j){
     print("#######################")
    print(j)
    
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- aggexp_cd8
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) )
        print(ds$Condition  %>% table)
    
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Condition
        markers_sc  <- FindAllMarkers(ds, only.pos = T, test.use = "DESeq2", return.thresh = 1)
        markers_sc$source  <- "scRNAseq_RNA"
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    markers_sc2  <- data.frame()
    
   if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
        print("..")
        markers2$dataset  <- "cd8_l1_full_filt"
        return(markers2)
}


In [None]:
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 

In [None]:
mrk  %>% dplyr::filter(p_val_adj<0.05)

In [None]:
all_markers  <- mrk

### CD8 L2 datasets

In [None]:
l2_datasets  <- levels(factor(cd8_l1_full_filt_sub$annotations_l2))

In [None]:
l2_datasets

In [None]:
aggexp_cd8  <- AggregateExpression(cd8_l1_full_filt_sub, group.by = c("Sample_ID", "annotations_l2"), 
                                  features = genes_filt, return.seurat = T, assay = "RNA")

In [None]:
aggexp_cd8$Sample_ID  %>% table

In [None]:
md_cd8  <- cd8_l1_full_filt_sub@meta.data  %>% 
group_by(Sample_ID, Condition, Condition2, Age, Sex, Ketoacidosis, Keto_Time)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% 
mutate(Sample_ID = as.factor(Sample_ID))

In [None]:
md_cd8$Sample_ID  <- paste0("g",md_cd8$Sample_ID)

In [None]:
md_cd8  <- left_join(aggexp_cd8@meta.data, md_cd8)
rownames(md_cd8)  <- colnames(aggexp_cd8)

In [None]:
aggexp_cd8@meta.data  <- md_cd8

In [None]:
aggexp_cd8@meta.data

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Markers_DataFrame  <- function(j){
     print("#######################")
    print(j)
    
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- subset(aggexp_cd8, annotations_l2 == levels(factor((aggexp_cd8$annotations_l2)))[i])
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) )
        print(ds$Condition  %>% table)
    
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Condition
        markers_sc  <- FindAllMarkers(ds, only.pos = T, test.use = "DESeq2", return.thresh = 1)
        markers_sc$source  <- "scRNAseq_RNA"
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    markers_sc2  <- data.frame()
    
   if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
        print("..")
        markers2$dataset  <- levels(factor((aggexp_cd8$annotations_l2)))[i]
        return(markers2)
}


In [None]:
for(i in 1:length(levels(factor((aggexp_cd8$annotations_l2))))){

print("######################################################################")
if(i > 1){
suppressWarnings(rm(markers_sc, markers, markers1, markers_sc2, markers_sc_predia, markers_bulk, markers_bulk2))    
}
    
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 
    
all_markers  <- rbind(all_markers, mrk)

    
}

In [None]:
all_markers  %>% dplyr::filter(p_val_adj<0.05)

In [None]:
all_markers

In [None]:
all_markers_cd8   <- all_markers

In [None]:
ls()

## CD4 T cells

In [None]:
genes  <- rownames(cd4_l1_full_filt_sub@assays$RNA)

In [None]:
genes_filt  <- genes[!(grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "\\.")|
                            grepl(genes, pattern = "LINC")|
                            grepl(genes, pattern = "^MIR")|
                            grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "HNRNP")|
                            grepl(genes, pattern = "^RP[LS]")|
                            grepl(genes, pattern = "\\-")|
                            grepl(genes, pattern = "TTTY")|
                            grepl(genes, pattern = "ORF")|
                            grepl(genes, pattern = "orf"))
                            ]

In [None]:
aggexp_cd4  <- AggregateExpression(cd4_l1_full_filt_sub, group.by = c("Sample_ID"), 
                                  features = genes_filt, return.seurat = T, assay = "RNA")

### Metadata consolidation

We need to add metadata with Partial response and ketoacidosis to the pseudobulked object. 

In [None]:
md_for_remission  <- cd4_l1_full_filt_sub@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, part_remission_y_n)  %>% 
tally %>% mutate(Condition2 = paste0("PR_",part_remission_y_n))  %>% 
dplyr::select(-n, -part_remission_y_n)

In [None]:
md_for_remission

In [None]:
cd4_l1_full_filt_sub$Condition2  %>% table

In [None]:
cd4_l1_full_filt_sub$Condition2  <- NULL
cd4_l1_full_filt_sub@meta.data  <- cd4_l1_full_filt_sub@meta.data  %>% 
left_join(md_for_remission)  %>% mutate(Condition2 = paste(Condition2, Time))  %>% 
mutate(Condition2 = ifelse(grepl(Condition2, pattern = "NA"), NA_character_,Condition2))
    
print(cd4_l1_full_filt_sub$Condition2  %>% table)
rownames(cd4_l1_full_filt_sub@meta.data)  <- colnames(cd4_l1_full_filt_sub)

In [None]:
md_for_ketoacidosis  <- cd4_l1_full_filt_sub@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, ph_man)  %>% 
tally %>% mutate(Ketoacidosis = ifelse(ph_man<7.3,"Keto_1","Keto_0"))  %>% 
dplyr::select(-ph_man, -n)

In [None]:
md_for_ketoacidosis

In [None]:
 cd4_l1_full_filt_sub@meta.data  <- cd4_l1_full_filt_sub@meta.data  %>% 
    left_join(md_for_ketoacidosis)   %>% mutate(Keto_Time = paste(Ketoacidosis, Time))  %>% 
    mutate(Keto_Time = ifelse(grepl(Keto_Time, pattern = "NA"), NA_character_,Keto_Time))
    
    print(cd4_l1_full_filt_sub$Ketoacidosis  %>% table)
    rownames(cd4_l1_full_filt_sub@meta.data)  <- colnames(cd4_l1_full_filt_sub)

In [None]:
Conditions  <- list(c("Dia T0", "Ctrl T0"),
                    c("Dia T0", "Dia T1"),
                    c("Dia T1", "Ctrl T0"),
                    c("PR_0 T0", "PR_1 T0"),
                    c("PR_0 T1", "PR_1 T1"),
                    c("PR_0 T0", "PR_0 T1"),
                    c("PR_1 T0", "PR_1 T1"),
                    c("Keto_1 T0", "Keto_0 T0"))

In [None]:
md_cd4  <- cd4_l1_full_filt_sub@meta.data  %>% 
group_by(Sample_ID, Condition, Condition2, Age, Sex, Ketoacidosis, Keto_Time)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% 
mutate(Sample_ID = as.factor(Sample_ID))

In [None]:
md_cd4$Sample_ID  <- paste0("g",md_cd4$Sample_ID)

In [None]:

md_cd4  <- left_join(aggexp_cd4@meta.data, md_cd4)
rownames(md_cd4)  <- colnames(aggexp_cd4)

### CD4 L1 dataset

In [None]:
aggexp_cd4@meta.data  <- md_cd4

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Markers_DataFrame  <- function(j){
     print("#######################")
    print(j)
    
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- aggexp_cd4
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) )
        print(ds$Condition  %>% table)
    
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Condition
        markers_sc  <- FindAllMarkers(ds, only.pos = T, test.use = "DESeq2", return.thresh = 1)
        markers_sc$source  <- "scRNAseq_RNA"
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    markers_sc2  <- data.frame()
    
   if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
        print("..")
        markers2$dataset  <- "cd4_l1_full_filt"
        return(markers2)
}


In [None]:
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 

In [None]:
mrk  %>% dplyr::filter(p_val_adj<0.05)

In [None]:
all_markers  <- mrk

### CD4 L2 datasets

In [None]:
l2_datasets  <- levels(factor(cd4_l1_full_filt_sub$annotations_l2))

In [None]:
l2_datasets

In [None]:
aggexp_cd4  <- AggregateExpression(cd4_l1_full_filt_sub, group.by = c("Sample_ID", "annotations_l2"), 
                                  features = genes_filt, return.seurat = T, assay = "RNA")

In [None]:
aggexp_cd4$Sample_ID  %>% table

In [None]:
md_cd4  <- cd4_l1_full_filt_sub@meta.data  %>% 
group_by(Sample_ID, Condition, Condition2, Age, Sex, Ketoacidosis, Keto_Time)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% 
mutate(Sample_ID = as.factor(Sample_ID))

In [None]:
md_cd4$Sample_ID  <- paste0("g",md_cd4$Sample_ID)

In [None]:
md_cd4  <- left_join(aggexp_cd4@meta.data, md_cd4)
rownames(md_cd4)  <- colnames(aggexp_cd4)

In [None]:
aggexp_cd4@meta.data  <- md_cd4

In [None]:
aggexp_cd4@meta.data

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Markers_DataFrame  <- function(j){
     print("#######################")
    print(j)
    
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- subset(aggexp_cd4, annotations_l2 == levels(factor((aggexp_cd4$annotations_l2)))[i])
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) )
        print(ds$Condition  %>% table)
    
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Condition
        markers_sc  <- FindAllMarkers(ds, only.pos = T, test.use = "DESeq2", return.thresh = 1)
        markers_sc$source  <- "scRNAseq_RNA"
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    markers_sc2  <- data.frame()
    
   if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
        print("..")
        markers2$dataset  <- levels(factor((aggexp_cd4$annotations_l2)))[i]
        return(markers2)
}


In [None]:
for(i in 1:length(levels(factor((aggexp_cd4$annotations_l2))))){

print("######################################################################")
if(i > 1){
suppressWarnings(rm(markers_sc, markers, markers1, markers_sc2, markers_sc_predia, markers_bulk, markers_bulk2))    
}
    
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 
    
all_markers  <- rbind(all_markers, mrk)

    
}

In [None]:
all_markers_cd4  <- all_markers

In [None]:
all_markers_cd4  %>% dplyr::filter(p_val_adj<0.05)

In [None]:
mrk  %>% dplyr::filter(p_val_adj<0.05)

# DESeq FoldChanges

## CD8

In [None]:
genes  <- rownames(cd8_l1_full_filt_sub@assays$RNA)

In [None]:
genes_filt  <- genes[!(grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "\\.")|
                            grepl(genes, pattern = "LINC")|
                            grepl(genes, pattern = "^MIR")|
                            grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "HNRNP")|
                            grepl(genes, pattern = "^IG[KLH]")|
                            grepl(genes, pattern = "^RP[LS]")|
                            grepl(genes, pattern = "\\-")|
                            grepl(genes, pattern = "TTTY")|
                            grepl(genes, pattern = "ORF")|
                            grepl(genes, pattern = "orf"))
                            ]

In [None]:
aggexp_cd8  <- AggregateExpression(cd8_l1_full_filt_sub, group.by = c("Sample_ID"), 
                                  features = genes_filt, return.seurat = T, assay = "RNA")

In [None]:
md_cd8  <- cd8_l1_full_filt_sub@meta.data  %>% 
group_by(Sample_ID, Condition, Condition2, Age, Sex, Ketoacidosis, Keto_Time)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% 
mutate(Sample_ID = as.factor(Sample_ID))

md_cd8$Sample_ID  <- paste0("g",md_cd8$Sample_ID)


md_cd8  <- left_join(aggexp_cd8@meta.data, md_cd8)
rownames(md_cd8)  <- colnames(aggexp_cd8)

In [None]:
aggexp_cd8@meta.data  <- md_cd8

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Markers_DataFrame  <- function(j){
     print("#######################")
    print(j)
    
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- aggexp_cd8
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) )
        print(ds$Condition  %>% table)
    
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Condition
        markers_sc  <- FoldChange(ds, `ident.1` = Condition_1, `ident.2` = Condition_2, test.use = "DESeq2")
    markers_sc$gene  <- rownames(markers_sc)
        markers_sc$source  <- "scRNAseq_RNA"
        rownames(markers_sc)  <- NULL
    #print(paste("DE RNA: ", nrow(markers_sc)))
    
    markers_sc2  <- data.frame()
    
   if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
        print("..")
        markers2$dataset  <- "cd8_l1_full_filt"
        return(markers2)
}


In [None]:
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 

In [None]:
mrk  %>% arrange(avg_log2FC)

In [None]:
all_fcs_cd8  <- mrk

In [None]:
### CD8 L2 datasets

In [None]:
l2_datasets  <- levels(factor(cd8_l1_full_filt_sub$annotations_l2))

In [None]:
l2_datasets

In [None]:
aggexp_cd8  <- AggregateExpression(cd8_l1_full_filt_sub, group.by = c("Sample_ID", "annotations_l2"), 
                                  features = genes_filt, return.seurat = T, assay = "RNA")

In [None]:
md_cd8  <- cd8_l1_full_filt_sub@meta.data  %>% 
group_by(Sample_ID, Condition, Condition2, Age, Sex, Ketoacidosis, Keto_Time)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% 
mutate(Sample_ID = as.factor(Sample_ID))

In [None]:
md_cd8$Sample_ID  <- paste0("g",md_cd8$Sample_ID)

In [None]:
md_cd8  <- left_join(aggexp_cd8@meta.data, md_cd8)
rownames(md_cd8)  <- colnames(aggexp_cd8)

In [None]:
aggexp_cd8@meta.data  <- md_cd8

In [None]:
aggexp_cd8@meta.data

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Markers_DataFrame  <- function(j){
     print("#######################")
    print(j)
    
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- subset(aggexp_cd8, annotations_l2 == levels(factor((aggexp_cd8$annotations_l2)))[i])
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) )
        print(ds$Condition  %>% table)
    
     ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Condition
        markers_sc  <- FoldChange(ds, `ident.1` = Condition_1, `ident.2` = Condition_2, test.use = "DESeq2")
    markers_sc$gene  <- rownames(markers_sc)
        markers_sc$source  <- "scRNAseq_RNA"
        rownames(markers_sc)  <- NULL
    #print(paste("DE RNA: ", nrow(markers_sc)))
    
    markers_sc2  <- data.frame()
    
   if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
        print("..")
        markers2$dataset  <- levels(factor((aggexp_cd8$annotations_l2)))[i]
        return(markers2)
}


In [None]:
for(i in 1:length(levels(factor((aggexp_cd8$annotations_l2))))){

print("######################################################################")
if(i > 1){
suppressWarnings(rm(markers_sc, markers, markers1, markers_sc2, markers_sc_predia, markers_bulk, markers_bulk2))    
}
    
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 
    
all_fcs_cd8  <- rbind(all_fcs_cd8, mrk)

    
}

In [None]:
all_fcs_cd8  %>% arrange(avg_log2FC)

## CD4

In [None]:
genes  <- rownames(cd4_l1_full_filt_sub@assays$RNA)

In [None]:
genes_filt  <- genes[!(grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "\\.")|
                            grepl(genes, pattern = "LINC")|
                            grepl(genes, pattern = "^MIR")|
                            grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "HNRNP")|
                            grepl(genes, pattern = "^IG[KLH]")|
                            grepl(genes, pattern = "^RP[LS]")|
                            grepl(genes, pattern = "\\-")|
                            grepl(genes, pattern = "TTTY")|
                            grepl(genes, pattern = "ORF")|
                            grepl(genes, pattern = "orf"))
                            ]

In [None]:
aggexp_cd4  <- AggregateExpression(cd4_l1_full_filt_sub, group.by = c("Sample_ID"), 
                                  features = genes_filt, return.seurat = T, assay = "RNA")

In [None]:
md_cd4  <- cd4_l1_full_filt_sub@meta.data  %>% 
group_by(Sample_ID, Condition, Condition2, Age, Sex, Ketoacidosis, Keto_Time)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% 
mutate(Sample_ID = as.factor(Sample_ID))

md_cd4$Sample_ID  <- paste0("g",md_cd4$Sample_ID)


md_cd4  <- left_join(aggexp_cd4@meta.data, md_cd4)
rownames(md_cd4)  <- colnames(aggexp_cd4)

In [None]:
aggexp_cd4@meta.data  <- md_cd4

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Markers_DataFrame  <- function(j){
     print("#######################")
    print(j)
    
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- aggexp_cd4
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) )
        print(ds$Condition  %>% table)
    
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Condition
        markers_sc  <- FoldChange(ds, `ident.1` = Condition_1, `ident.2` = Condition_2, test.use = "DESeq2")
    markers_sc$gene  <- rownames(markers_sc)
        markers_sc$source  <- "scRNAseq_RNA"
        rownames(markers_sc)  <- NULL
    #print(paste("DE RNA: ", nrow(markers_sc)))
    
    markers_sc2  <- data.frame()
    
   if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
        print("..")
        markers2$dataset  <- "cd4_l1_full_filt"
        return(markers2)
}


In [None]:
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 

In [None]:
mrk  %>% arrange(avg_log2FC)

In [None]:
all_fcs_cd4  <- mrk

In [None]:
### cd4 L2 datasets

In [None]:
l2_datasets  <- levels(factor(cd4_l1_full_filt_sub$annotations_l2))

In [None]:
l2_datasets

In [None]:
aggexp_cd4  <- AggregateExpression(cd4_l1_full_filt_sub, group.by = c("Sample_ID", "annotations_l2"), 
                                  features = genes_filt, return.seurat = T, assay = "RNA")

In [None]:
md_cd4  <- cd4_l1_full_filt_sub@meta.data  %>% 
group_by(Sample_ID, Condition, Condition2, Age, Sex, Ketoacidosis, Keto_Time)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% 
mutate(Sample_ID = as.factor(Sample_ID))

In [None]:
md_cd4$Sample_ID  <- paste0("g",md_cd4$Sample_ID)

In [None]:
md_cd4  <- left_join(aggexp_cd4@meta.data, md_cd4)
rownames(md_cd4)  <- colnames(aggexp_cd4)

In [None]:
aggexp_cd4@meta.data  <- md_cd4

In [None]:
aggexp_cd4@meta.data

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Markers_DataFrame  <- function(j){
     print("#######################")
    print(j)
    
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- subset(aggexp_cd4, annotations_l2 == levels(factor((aggexp_cd4$annotations_l2)))[i])
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) )
        print(ds$Condition  %>% table)
    
     ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Condition
        markers_sc  <- FoldChange(ds, `ident.1` = Condition_1, `ident.2` = Condition_2, test.use = "DESeq2")
    markers_sc$gene  <- rownames(markers_sc)
        markers_sc$source  <- "scRNAseq_RNA"
        rownames(markers_sc)  <- NULL
    #print(paste("DE RNA: ", nrow(markers_sc)))
    
    markers_sc2  <- data.frame()
    
   if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
        print("..")
        markers2$dataset  <- levels(factor((aggexp_cd4$annotations_l2)))[i]
        return(markers2)
}


In [None]:
for(i in 1:length(levels(factor((aggexp_cd4$annotations_l2))))){

print("######################################################################")
if(i > 1){
suppressWarnings(rm(markers_sc, markers, markers1, markers_sc2, markers_sc_predia, markers_bulk, markers_bulk2))    
}
    
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 
    
all_fcs_cd4  <- rbind(all_fcs_cd4, mrk)

    
}

In [None]:
all_fcs_cd4  %>% arrange(avg_log2FC)

## CD8 T cells

In [None]:
genes  <- rownames(cd8_l1_full_filt_sub@assays$RNA)

In [None]:
genes_filt  <- genes[!(grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "\\.")|
                            grepl(genes, pattern = "LINC")|
                            grepl(genes, pattern = "^MIR")|
                            grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "HNRNP")|
                            grepl(genes, pattern = "^RP[LS]")|
                            grepl(genes, pattern = "\\-")|
                            grepl(genes, pattern = "TTTY")|
                            grepl(genes, pattern = "ORF")|
                            grepl(genes, pattern = "orf"))
                            ]

In [None]:
aggexp_cd8  <- AggregateExpression(cd8_l1_full_filt_sub, group.by = c("Sample_ID"), 
                                  features = genes_filt, return.seurat = T, assay = "RNA")

### Metadata consolidation

We need to add metadata with Partial response and ketoacidosis to the pseudobulked object. 

In [None]:
md_for_remission  <- cd8_l1_full_filt_sub@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, part_remission_y_n)  %>% 
tally %>% mutate(Condition2 = paste0("PR_",part_remission_y_n))  %>% 
dplyr::select(-n, -part_remission_y_n)

In [None]:
md_for_remission

In [None]:
cd8_l1_full_filt_sub$Condition2  %>% table

In [None]:
cd8_l1_full_filt_sub@meta.data

In [None]:
 colnames(cd8_l1_full_filt_sub)

In [None]:
cd8_l1_full_filt_sub$Condition2  <- NULL
cd8_l1_full_filt_sub@meta.data  <- cd8_l1_full_filt_sub@meta.data  %>% 
left_join(md_for_remission)  %>% mutate(Condition2 = paste(Condition2, Time))  %>% 
mutate(Condition2 = ifelse(grepl(Condition2, pattern = "NA"), NA_character_,Condition2))
    

In [None]:
md_for_ketoacidosis  <- cd8_l1_full_filt_sub@meta.data  %>% 
dplyr::filter(Condition %in% c("Dia T0"))  %>% 
group_by(Patient_ID, ph_man)  %>% 
tally %>% mutate(Ketoacidosis = ifelse(ph_man<7.3,"Keto_1","Keto_0"))  %>% 
dplyr::select(-ph_man, -n)

In [None]:
md_for_ketoacidosis

In [None]:
 cd8_l1_full_filt_sub@meta.data  <- cd8_l1_full_filt_sub@meta.data  %>% 
    left_join(md_for_ketoacidosis)   %>% mutate(Keto_Time = paste(Ketoacidosis, Time))  %>% 
    mutate(Keto_Time = ifelse(grepl(Keto_Time, pattern = "NA"), NA_character_,Keto_Time))
    
    print(cd8_l1_full_filt_sub$Ketoacidosis  %>% table)


In [None]:
Conditions  <- list(c("Dia T0", "Ctrl T0"),
                    c("Dia T0", "Dia T1"),
                    c("Dia T1", "Ctrl T0"),
                    c("PR_0 T0", "PR_1 T0"),
                    c("PR_0 T1", "PR_1 T1"),
                    c("PR_0 T0", "PR_0 T1"),
                    c("PR_1 T0", "PR_1 T1"),
                    c("Keto_1 T0", "Keto_0 T0"))

In [None]:
md_cd8  <- cd8_l1_full_filt_sub@meta.data  %>% 
group_by(Sample_ID, Condition, Condition2, Age, Sex, Ketoacidosis, Keto_Time)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% 
mutate(Sample_ID = as.factor(Sample_ID))

In [None]:
md_cd8$Sample_ID  <- paste0("g",md_cd8$Sample_ID)

In [None]:

md_cd8  <- left_join(aggexp_cd8@meta.data, md_cd8)
rownames(md_cd8)  <- colnames(aggexp_cd8)

### CD8 L1 dataset

In [None]:
aggexp_cd8@meta.data  <- md_cd8

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Markers_DataFrame  <- function(j){
     print("#######################")
    print(j)
    
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- aggexp_cd8
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) )
        print(ds$Condition  %>% table)
    
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Condition
        markers_sc  <- FindAllMarkers(ds, only.pos = T, test.use = "DESeq2", return.thresh = 1)
        markers_sc$source  <- "scRNAseq_RNA"
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    markers_sc2  <- data.frame()
    
   if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
        print("..")
        markers2$dataset  <- "cd8_l1_full_filt"
        return(markers2)
}


In [None]:
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 

In [None]:
mrk 

In [None]:
all_markers  <- mrk

### CD8 L2 datasets

In [None]:
l2_datasets  <- levels(factor(cd8_l1_full_filt_sub$annotations_l2))

In [None]:
l2_datasets

In [None]:
aggexp_cd8  <- AggregateExpression(cd8_l1_full_filt_sub, group.by = c("Sample_ID", "annotations_l2"), 
                                  features = genes_filt, return.seurat = T, assay = "RNA")

In [None]:
aggexp_cd8$Sample_ID  %>% table

In [None]:
md_cd8  <- cd8_l1_full_filt_sub@meta.data  %>% 
group_by(Sample_ID, Condition, Condition2, Age, Sex, Ketoacidosis, Keto_Time)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% 
mutate(Sample_ID = as.factor(Sample_ID))

In [None]:
md_cd8$Sample_ID  <- paste0("g",md_cd8$Sample_ID)

In [None]:
md_cd8  <- left_join(aggexp_cd8@meta.data, md_cd8)
rownames(md_cd8)  <- colnames(aggexp_cd8)

In [None]:
aggexp_cd8@meta.data  <- md_cd8

In [None]:
aggexp_cd8@meta.data

In [None]:
# Function to calculate markers for a subsetted da for given conditions

Create_Markers_DataFrame  <- function(j){
     print("#######################")
    print(j)
    
    Condition_1 = Conditions[[j]][1]
    Condition_2 = Conditions[[j]][2]
    
    seurat_obj  <- subset(aggexp_cd8, annotations_l2 == levels(factor((aggexp_cd8$annotations_l2)))[i])
        
    if(grepl(Condition_1, pattern = "PR")){
      seurat_obj$Condition  <- seurat_obj$Condition2
    }
    
    if(grepl(Condition_1, pattern = "Keto")){
      seurat_obj$Condition  <- seurat_obj$Keto_Time
    }
    
    ## Create subsetted df
        ds  <- subset(seurat_obj, Condition %in% c(Condition_1, Condition_2) )
        print(ds$Condition  %>% table)
    
    ## scRNAseq - RNA
    DefaultAssay(ds)  <- "RNA"
        Idents(ds)  <- ds$Condition
        markers_sc  <- FindAllMarkers(ds, only.pos = T, test.use = "DESeq2", return.thresh = 1)
        markers_sc$source  <- "scRNAseq_RNA"
        rownames(markers_sc)  <- NULL
    print(paste("DE RNA: ", nrow(markers_sc)))
    
    markers_sc2  <- data.frame()
    
   if(nrow(markers_sc2)>0 & nrow(markers_sc)>0){ 
            markers2 <- rbind(markers_sc, markers_sc2) } else {
        if(nrow(markers_sc)>0){
            markers2  <- markers_sc
            } else {
            if(nrow(markers_sc2)>0){
                markers2  <- markers_sc2
            
            } else {
                markers2  <- NULL
                }
        }
        }
    
        markers2$test_type  <- paste(Conditions[[j]][1], "vs", Conditions[[j]][2])
        print("..")
        markers2$dataset  <- levels(factor((aggexp_cd8$annotations_l2)))[i]
        return(markers2)
}


In [None]:
for(i in 1:length(levels(factor((aggexp_cd8$annotations_l2))))){

print("######################################################################")
if(i > 1){
suppressWarnings(rm(markers_sc, markers, markers1, markers_sc2, markers_sc_predia, markers_bulk, markers_bulk2))    
}
    
mrk  <- map(.x = 1:8, Create_Markers_DataFrame)

mrk  <- bind_rows(mrk) 
    
all_markers  <- rbind(all_markers, mrk)

    
}

In [None]:
all_markers  %>% dplyr::filter(p_val_adj<0.05)

In [None]:
all_markers


In [None]:
all_markers_cd8   <- all_markers

# Correlation DESeq and Wilcox

In [None]:
all_markers_cd8

In [None]:
all_fcs_cd8$dataset  %>% table

In [None]:
## Load fold changes calculated by Wilcox

In [None]:
paths_fc  <- list.files("../../240218_VN_Diabetes_V05/tables/fold_change", full.names = T)
dataset_names  <- list.files("../../240218_VN_Diabetes_V05/tables/fold_change", full.names = F)


In [None]:
paths_fc

In [None]:
paths_fc  <- paths_fc[which(grepl(paths_fc, pattern = "Ctrl")|grepl(paths_fc, pattern = "DiaT0_vs_DiaT1"))]

In [None]:
dataset_names  <- dataset_names[which(grepl(dataset_names, pattern = "Ctrl")|grepl(dataset_names, pattern = "DiaT0_vs_DiaT1"))]

In [None]:
dataset_names

In [None]:
paths_fc

In [None]:
fc_read  <- function(i){
    fc.df  <- read_csv(paths_fc[i])  %>% as.data.frame()
    fc.df$file  <- dataset_names[i]
    return(fc.df)
}

In [None]:
fc.dfs  <- map(1:length(paths_fc),.f = fc_read)

In [None]:
fc.dfs2  <- bind_rows(fc.dfs)

In [None]:
fc.dfs2$file  %>% table

In [None]:
fc.dfs3  <- fc.dfs2 %>%  
mutate(dataset = file)  %>% 
mutate(dataset = gsub(dataset, pattern = "_Ctrl_vs_DiaT0", replacement = ".Ctrl_vs_DiaT0"))  %>% 
mutate(dataset = gsub(dataset, pattern = "_Ctrl_vs_DiaT1", replacement = ".Ctrl_vs_DiaT1"))  %>% 
mutate(dataset = gsub(dataset, pattern = "_DiaT0_vs_DiaT1", replacement = ".DiaT0_vs_DiaT1"))  %>% 
separate(dataset, into = c("population", "test_type"), sep = "\\.", remove = F)  %>% 
dplyr::filter(grepl(x = cluster, pattern = "Dia")) 

### CD8 FC correlations

In [None]:
fc.dfs3

In [None]:
fc.dfs3.cd8  <- fc.dfs3  %>% dplyr::filter(grepl(population, pattern = "cd8"))

We see that the names of populations are different between DESeq FCs and Wilcox FCs. We need to fix that. 

In [None]:
fc.dfs3.cd8$population  %>% table

In [None]:
all_fcs_cd8$dataset  %>% table

In [None]:
colnames(all_fcs_cd8)

Now I will create a new column population that will contain factor with the same levels as in fc.dfs3.cd8

In [None]:
all_fcs_cd8  <- all_fcs_cd8  %>% mutate(population = recode_factor(dataset,
              "CD8 NK cells---NK cells" = "cd8_l2_nk",
              "CD8 T cells---Naive" = "cd8_l3_naive",
              "CD8 T cells---Proliferating" = "cd8_l3_prolif",
              "CD8 T cells---Tcm" = "cd8_l3_tcm",
              "CD8 T cells---Tem" = "cd8_l3_tem",
              "CD8 T cells---Temra" = "cd8_l3_temra",
              "CD8 Unconventional T cells---gd T cells" = "cd8_l2_subcluster",
              "CD8 Unconventional T cells---MAIT cells" = "cd8_l2_unc",
              "cd8_l1_full_filt" = "cd8_l1_full_filt"))

We also need to make sure that we will be comparing the same test types. 

In [None]:
all_fcs_cd8$test_type  %>% table

In [None]:
fc.dfs3.cd8$test_type  %>% table

For now we will only work with Ctrl and T1D comnparisons. 

In [None]:
fc.dfs3.cd8  <- fc.dfs3.cd8  %>% 
mutate(test_type = recode_factor(test_type,
              "Ctrl_vs_DiaT0" = "Dia T0 vs Ctrl T0" ,
              "Ctrl_vs_DiaT1" = "Dia T1 vs Ctrl T0",
              "DiaT0_vs_DiaT1" = "Dia T0 vs Dia T1"))

Let's check that we have both positive and negative values for FCs in both tables. 

In [None]:
fc.dfs3.cd8$cluster  %>% table

In [None]:
all_fcs_cd8

In [None]:
fc.dfs3.cd8  %>% dplyr::filter(test_type == "Dia T0 vs Dia T1")

Correlations of FCs.

In [None]:
wilcox_fcs  <- fc.dfs3.cd8  %>% 
                dplyr::filter(!(test_type == "Dia T0 vs Dia T1" & cluster == "Dia T1"))  %>% 
                dplyr::select(gene, population, test_type, avg_log2FC_Wilcox = avg_log2FC)

DESeq_fcs  <- all_fcs_cd8  %>% dplyr::select(gene, population, test_type, avg_log2FC_DESeq = avg_log2FC)


In [None]:
all_joint_fcs  <- wilcox_fcs  %>% left_join(DESeq_fcs)

In [None]:
all_joint_fcs

In [None]:
options(repr.plot.width = 12, repr.plot.height = 20)
all_joint_fcs  %>% 
ggplot(aes(x = avg_log2FC_Wilcox, y = avg_log2FC_DESeq)) +
geom_point(alpha = 0.5, size = 0.5) +
facet_grid(cols = vars(test_type), rows = vars(population), scales = "free") +
ggpubr::stat_cor() +
geom_smooth(method = lm) +
ggtheme()

In [None]:
all_joint_fcs

In [None]:
cor_stats_cd8  <- all_joint_fcs  %>% 
dplyr::filter(!is.na(avg_log2FC_Wilcox) & !is.na(avg_log2FC_DESeq))  %>% 
group_by(population, test_type)  %>% 
rstatix::cor_test(avg_log2FC_Wilcox, avg_log2FC_DESeq)  

In [None]:
cor_stats_cd8

In [None]:
options(repr.plot.width = 12, repr.plot.height = 5)
cor_stats_cd8  %>% 
ggplot(aes(x = statistic, y = -log(p), color = population)) +
geom_point() +
facet_wrap(~test_type, ncol = 3)

###  CD4 FCS correlation

In [None]:
fc.dfs3.cd4  <- fc.dfs3  %>% dplyr::filter(grepl(population, pattern = "cd4"))

We see that the names of populations are different between DESeq FCs and Wilcox FCs. We need to fix that. 

In [None]:
fc.dfs3.cd4$population  %>% table

In [None]:
all_fcs_cd4$dataset  %>% table

In [None]:
colnames(all_fcs_cd4)

Now I will create a new column population that will contain factor with the same levels as in fc.dfs3.cd4

In [None]:
all_fcs_cd4  <- all_fcs_cd4  %>% mutate(population = recode_factor(dataset,
              "CD4 T cells---ISAGhi" = "cd4_l3_isaghi",
              "CD4 T cells---Naive" = "cd4_l3_naive",
              "CD4 T cells---Nfkb" = "cd4_l3_nfkb",
              "CD4 T cells---Proliferating" = "cd4_l3_proliferating",
              "CD4 T cells---Temra" = "cd4_l3_temra",
              "CD4 T cells---Tfh" = "cd4_l3_tfh",
              "CD4 T cells---Th1Th17" = "cd4_l3_th1th17",
              "CD4 T cells---Th2" = "cd4_l3_th2",
              "CD4 T cells---Treg" = "cd4_l3_treg",
              "CD4 Unconventional T cells---Unconventional" = "cd4_l2_unc",
              "cd4_l1_full_filt" = "cd4_l1_full_filt"
              ))

We also need to make sure that we will be comparing the same test types. 

In [None]:
all_fcs_cd4$test_type  %>% table

In [None]:
fc.dfs3.cd4$test_type  %>% table

For now we will only work with Ctrl and T1D comnparisons. 

In [None]:
fc.dfs3.cd4  <- fc.dfs3.cd4  %>% 
mutate(test_type = recode_factor(test_type,
              "Ctrl_vs_DiaT0" = "Dia T0 vs Ctrl T0" ,
              "Ctrl_vs_DiaT1" = "Dia T1 vs Ctrl T0",
              "DiaT0_vs_DiaT1" = "Dia T0 vs Dia T1"))

Let's check that we have both positive and negative values for FCs in both tables. 

In [None]:
fc.dfs3.cd4$cluster  %>% table

In [None]:
all_fcs_cd4

In [None]:
fc.dfs3.cd4  %>% dplyr::filter(test_type == "Dia T0 vs Dia T1")

Correlations of FCs.

In [None]:
wilcox_fcs_cd4  <- fc.dfs3.cd4  %>% 
                dplyr::filter(!(test_type == "Dia T0 vs Dia T1" & cluster == "Dia T1"))  %>% 
                dplyr::select(gene, population, test_type, avg_log2FC_Wilcox = avg_log2FC)

DESeq_fcs_cd4  <- all_fcs_cd4  %>% dplyr::select(gene, population, test_type, avg_log2FC_DESeq = avg_log2FC)


In [None]:
all_joint_fcs_cd4  <- wilcox_fcs_cd4  %>% left_join(DESeq_fcs_cd4)

In [None]:
all_joint_fcs_cd4

In [None]:
options(repr.plot.width = 12, repr.plot.height = 20)
all_joint_fcs_cd4  %>% 
dplyr::filter(population != "cd4_l2_subcluster")  %>% 
ggplot(aes(x = avg_log2FC_Wilcox, y = avg_log2FC_DESeq)) +
geom_point(alpha = 0.5, size = 0.5) +
facet_grid(cols = vars(test_type), rows = vars(population), scales = "free") +
ggpubr::stat_cor() +
geom_smooth(method = lm) +
ggtheme()

In [None]:
cor_stats_cd4  <- all_joint_fcs_cd4  %>% 
dplyr::filter(!is.na(avg_log2FC_Wilcox) & !is.na(avg_log2FC_DESeq))  %>% 
group_by(population, test_type)  %>% 
rstatix::cor_test(avg_log2FC_Wilcox, avg_log2FC_DESeq)  

In [None]:
cor_stats_cd4

In [None]:
options(repr.plot.width = 12, repr.plot.height = 5)
cor_stats_cd4  %>% 
ggplot(aes(x = statistic, y = -log(p), color = population)) +
geom_point() +
facet_wrap(~test_type, ncol = 3)

In [None]:
ls()

# Main DEG genes in groups and by DESeq

## CD4

In [None]:
genes  <- rownames(cd4_l1_full_filt_sub@assays$RNA)

In [None]:
genes_filt  <- genes[!(grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "\\.")|
                            grepl(genes, pattern = "LINC")|
                            grepl(genes, pattern = "^MIR")|
                            grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "HNRNP")|
                            grepl(genes, pattern = "^RP[LS]")|
                            grepl(genes, pattern = "\\-")|
                            grepl(genes, pattern = "TTTY")|
                            grepl(genes, pattern = "ORF")|
                            grepl(genes, pattern = "orf"))
                            ]

In [None]:
aggexp_cd4  <- AggregateExpression(cd4_l1_full_filt_sub, group.by = c("Sample_ID"), 
                                  features = genes_filt, return.seurat = T, assay = "RNA")

In [None]:
md_cd4  <- cd4_l1_full_filt_sub@meta.data  %>% 
group_by(Sample_ID, Condition, Condition2, Age, Sex)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% 
mutate(Sample_ID = as.factor(Sample_ID))

In [None]:
md_cd4$Sample_ID  <- paste0("g",md_cd4$Sample_ID)

In [None]:

md_cd4  <- left_join(aggexp_cd4@meta.data, md_cd4)
rownames(md_cd4)  <- colnames(aggexp_cd4)

In [None]:
aggexp_cd4@meta.data  <- md_cd4

In [None]:
genes  <- c("GZMB","GZMA","TNF","LEF1","TCF7","GNLY","CXCR4","TNFAIP3")

In [None]:
df  <- aggexp_cd4@assays$RNA@layers$data[which(rownames(aggexp_cd4@assays$RNA) %in% genes),]

In [None]:
colnames(df)  <- colnames(aggexp_cd4)
rownames(df)  <- rownames(aggexp_cd4@assays$RNA)[which(rownames(aggexp_cd4@assays$RNA) %in% genes)]

In [None]:
df  <- df  %>% t()  %>% as.data.frame()  %>% 
rownames_to_column("Sample_ID")  %>% 
pivot_longer(!Sample_ID, names_to = "gene", values_to = "value")  


In [None]:
df

In [None]:
df  <- df  %>% left_join(md_cd4)

In [None]:
df  %>% 
ggplot(aes(x = Condition, y = value)) +
geom_violin(scale = "width", aes(fill = Condition), alpha = 0.4) +
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
facet_wrap(~gene, ncol = 4, scales = "free") +
theme_classic() +
ggtheme() +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
theme(axis.text.x = element_blank(),
      axis.ticks.x = element_blank()
      )

## CD8

In [None]:
genes  <- rownames(cd8_l1_full_filt_sub@assays$RNA)

In [None]:
genes_filt  <- genes[!(grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "\\.")|
                            grepl(genes, pattern = "LINC")|
                            grepl(genes, pattern = "^MIR")|
                            grepl(genes, pattern = "^MT")|
                            grepl(genes, pattern = "HNRNP")|
                            grepl(genes, pattern = "^RP[LS]")|
                            grepl(genes, pattern = "\\-")|
                            grepl(genes, pattern = "TTTY")|
                            grepl(genes, pattern = "ORF")|
                            grepl(genes, pattern = "orf"))
                            ]

In [None]:
aggexp_cd8  <- AggregateExpression(cd8_l1_full_filt_sub, group.by = c("Sample_ID"), 
                                  features = genes_filt, return.seurat = T, assay = "RNA")

In [None]:
md_cd8  <- cd8_l1_full_filt_sub@meta.data  %>% 
group_by(Sample_ID, Condition, Condition2, Age, Sex)  %>% tally  %>% dplyr::select(-n)  %>% 
ungroup  %>% 
mutate(Sample_ID = as.factor(Sample_ID))

In [None]:
md_cd8$Sample_ID  <- paste0("g",md_cd8$Sample_ID)

In [None]:

md_cd8  <- left_join(aggexp_cd8@meta.data, md_cd8)
rownames(md_cd8)  <- colnames(aggexp_cd8)

In [None]:
aggexp_cd8@meta.data  <- md_cd8

In [None]:
genes  <- c("GZMB","GZMA","TNF","LEF1","TCF7","GNLY","CXCR4","TNFAIP3")

In [None]:
df  <- aggexp_cd8@assays$RNA@layers$data[which(rownames(aggexp_cd8@assays$RNA) %in% genes),]

In [None]:
colnames(df)  <- colnames(aggexp_cd8)
rownames(df)  <- rownames(aggexp_cd8@assays$RNA)[which(rownames(aggexp_cd8@assays$RNA) %in% genes)]

In [None]:
df  <- df  %>% t()  %>% as.data.frame()  %>% 
rownames_to_column("Sample_ID")  %>% 
pivot_longer(!Sample_ID, names_to = "gene", values_to = "value")  


In [None]:
df

In [None]:
df  <- df  %>% left_join(md_cd8)

In [None]:
df  %>% 
ggplot(aes(x = Condition, y = value)) +
geom_violin(scale = "width", aes(fill = Condition), alpha = 0.4) +
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
facet_wrap(~gene, ncol = 4, scales = "free") +
theme_classic() +
ggtheme() +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
theme(axis.text.x = element_blank(),
      axis.ticks.x = element_blank()
      )

In [None]:
all_markers_cd8  %>% dplyr::filter(test_type == "Dia T0 vs Ctrl T0" & dataset == "cd8_l1_full_filt")  %>% arrange(p_val)

In [None]:
mrk_t0  <- all_markers_cd8  %>% dplyr::filter(test_type == "Dia T0 vs Ctrl T0" & dataset == "cd8_l1_full_filt")  %>% arrange(p_val_adj)  %>% 
pull(gene)

In [None]:
all_markers_cd8  %>% dplyr::filter(test_type == "Dia T1 vs Ctrl T0" & dataset == "cd8_l1_full_filt")  %>% arrange(p_val)

In [None]:
mrk_t1  <- all_markers_cd8  %>% dplyr::filter(test_type == "Dia T1 vs Ctrl T0" & dataset == "cd8_l1_full_filt")  %>% 
arrange(p_val_adj)  %>% 
pull(gene)

In [None]:
mrk_t0  <- mrk_t0[1:200]
mrk_t1  <- mrk_t1[1:200]

In [None]:
intersect(mrk_t0, mrk_t1)

In [None]:
genes  <- c("STIM1", "STIM2", "ORAI2", "ORAI3", "TRPC1", "RCAN1", "PPP3CA", "PPP3R1 ", "NFATC1", "NFATC2", "NFATC3")

In [None]:
df  <- aggexp_cd8@assays$RNA@layers$data[which(rownames(aggexp_cd8@assays$RNA) %in% genes),]

In [None]:
colnames(df)  <- colnames(aggexp_cd8)
rownames(df)  <- rownames(aggexp_cd8@assays$RNA)[which(rownames(aggexp_cd8@assays$RNA) %in% genes)]

In [None]:
df  <- df  %>% t()  %>% as.data.frame()  %>% 
rownames_to_column("Sample_ID")  %>% 
pivot_longer(!Sample_ID, names_to = "gene", values_to = "value")  


In [None]:
df

In [None]:
df  <- df  %>% left_join(md_cd8)

In [None]:
df  %>% 
ggplot(aes(x = Condition, y = value)) +
geom_violin(scale = "width", aes(fill = Condition), alpha = 0.4) +
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
facet_wrap(~gene, ncol = 4, scales = "free") +
theme_classic() +
ggtheme() +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
theme(axis.text.x = element_blank(),
      axis.ticks.x = element_blank()
      )

In [None]:
mrk_t0  <- all_markers_cd8  %>% dplyr::filter(test_type == "Dia T0 vs Ctrl T0" & dataset == "cd8_l1_full_filt")  %>% arrange(p_val_adj)  %>% 
pull(gene)

In [None]:
all_markers_cd8  %>% dplyr::filter(test_type == "Dia T1 vs Ctrl T0" & dataset == "cd8_l1_full_filt")  %>% arrange(p_val)

In [None]:
mrk_t1  <- all_markers_cd8  %>% dplyr::filter(test_type == "Dia T0 vs Dia T1" & dataset == "cd8_l1_full_filt")  %>% 
arrange(p_val_adj)  %>% 
pull(gene)

In [None]:
mrk_t0  <- mrk_t0[1:200]
mrk_t1  <- mrk_t1[1:200]

In [None]:
intersect(mrk_t0, mrk_t1)

In [None]:
genes  <- c("ORAI1","NFATC2","NFATC3")

In [None]:
df  <- aggexp_cd8@assays$RNA@layers$data[which(rownames(aggexp_cd8@assays$RNA) %in% genes),]

In [None]:
colnames(df)  <- colnames(aggexp_cd8)
rownames(df)  <- rownames(aggexp_cd8@assays$RNA)[which(rownames(aggexp_cd8@assays$RNA) %in% genes)]

In [None]:
df  <- df  %>% t()  %>% as.data.frame()  %>% 
rownames_to_column("Sample_ID")  %>% 
pivot_longer(!Sample_ID, names_to = "gene", values_to = "value")  


In [None]:
df

In [None]:
df  <- df  %>% left_join(md_cd8)

In [None]:
options(repr.plot.height = 4, repr.plot.width = 9)

df  %>% 
ggplot(aes(x = Condition, y = value)) +
geom_violin(scale = "width", aes(fill = Condition), alpha = 0.4) +
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
facet_wrap(~gene, ncol = 4, scales = "free") +
theme_classic() +
ggtheme() +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
theme(axis.text.x = element_blank(),
      axis.ticks.x = element_blank()
      )

In [None]:
df  <- aggexp_cd4@assays$RNA@layers$data[which(rownames(aggexp_cd4@assays$RNA) %in% genes),]

In [None]:
colnames(df)  <- colnames(aggexp_cd4)
rownames(df)  <- rownames(aggexp_cd4@assays$RNA)[which(rownames(aggexp_cd4@assays$RNA) %in% genes)]

In [None]:
df  <- df  %>% t()  %>% as.data.frame()  %>% 
rownames_to_column("Sample_ID")  %>% 
pivot_longer(!Sample_ID, names_to = "gene", values_to = "value")  


In [None]:
df

In [None]:
df  <- df  %>% left_join(md_cd4)

In [None]:
df  %>% 
ggplot(aes(x = Condition, y = value)) +
geom_violin(scale = "width", aes(fill = Condition), alpha = 0.4) +
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
facet_wrap(~gene, ncol = 4, scales = "free") +
theme_classic() +
ggtheme() +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
theme(axis.text.x = element_blank(),
      axis.ticks.x = element_blank()
      )

In [None]:
ls()

In [None]:
cd8_l1_full_filt$Experiment_ID  %>% table

In [None]:
cd8_l1_full_filt@meta.data  %>% group_by(Experiment_ID, Disease, Patient_ID)  %>% tally  %>% 
dplyr::select(-n)  %>% 
group_by(Experiment_ID, Disease)  %>%
tally()

In [None]:
cd8_l1_full_filt@meta.data  %>% group_by(Experiment_ID, Condition, Patient_ID)  %>% tally  %>% 
dplyr::select(-n)  %>% 
group_by(Experiment_ID, Condition)  %>%
tally()  %>% 
pivot_wider(names_from = Condition, values_from = n)

In [None]:
cd4_l1_full_filt@meta.data  %>% group_by(Experiment_ID, Condition, Patient_ID)  %>% tally  %>% 
dplyr::select(-n)  %>% 
group_by(Experiment_ID, Condition)  %>%
tally()  %>% 
pivot_wider(names_from = Condition, values_from = n)

In [None]:
"ACTN4", "ADAM19", "AHR", "AKIRIN2", "ALCAM", "ALOX5AP", "SLF1", "ANTXR2", "ANXA1", "ANXA2", "ANXA2P1", "ANXA4", "AQP3", "ARHGAP18", "ASB2", "ATP2B4", "ATXN1", "B3GNT9", "BTG3", "SMCO4", "LACC1", "MIR22HG", "SMIM29", "CALHM2", "CAPG", "CASK", "CAST", "CBLL1", "CCDC107", "CCL5", "CCR2", "CCR6", "CD226", "CD28", "CD58", "CD63", "CD74", "CD84", "CDC42EP3", "CDK2AP2", "CHST7", "CLDND1", "CLIC1", "CLU", "CNPPD1", "COTL1", "CPPED1", "CREB3L2", "CRELD2", "CRIP1", "CRYBG3", "CTSA", "CTSC", "CXCR3", "DNAI2", "DUSP16", "DUSP5", "EFHD2", "AGO4", "EIF3A", "EIF4EBP2", "ELOVL5", "EPS15", "EVI2B", "NIBAN1", "ZC2HC1A", "PIEZO1", "FAR2", "FAS", "FBXL8", "FRMD4B", "GBP3", "GCLM", "GDPD5", "GLIPR1", "GOLGA7", "GSTK1", "GZMA", "GZMK", "HLA-DPA1", "HMGN4", "JPT1", "HNRNPLL", "HOPX", "IFI16", "IFI27", "IFNG", "IL10RA", "IL15RA", "IQGAP1", "IQGAP2", "ITGB1", "MATCAP1", "KIF1B", "KIF21A", "KLRB1", "LDHA", "LGALS1", "LGALS3", "LIMS1", "LIMS3", "WDR86-AS1", "", "", "", "", "MAP3K5", "MDFIC", "MFHAS1", "MIAT", "MIB1", "MICAL2", "MIS18BP1", "MLF1", "AFDN", "MTSS1", "MYBL1", "MYL6", "MYO1F", "NCAPH", "NCF4", "NETO2", "NINJ2", "NMU", "NOD2", "NPC1", "NPDC1", "NABP1", "NABP2", "OGDH", "OGFRL1", "OSBPL3", "PAM", "PDIA6", "PEA15", "PFKL", "PHACTR2", "PHTF2", "PLXNC1", "PPIF", "PPP1CA", "PREX1", "PRR5L", "PTTG1", "NECTIN3", "RAB27A", "REEP3", "REEP5", "RFTN1", "RGS3", "RHOU", "RNF126", "RNF149", "RORA", "S100A11", "S100A4", "SAP30", "SEC11C", "SH2D1A", "SH3BGRL3", "SH3BP5", "SLC2A3", "SLC35D2", "NHERF1", "SMAP1", "SPAG1", "SPOPL", "SRGN", "SSR3", "ST8SIA1", "STOM", "STX11", "TBCB", "TBX21", "TIGIT", "TLR3", "TMEM116", "TMEM156", "TMEM200A", "TMEM64", "TMX4", "TNF", "TNFRSF4", "TOR3A", "TP53INP1", "TPM4", "TRAC", "TRADD", "TTC39C", "TTYH2", "TXN", "TYMP", "UBL3", "USP46", "UST", "VCL", "YWHAH", "ZBTB38", "ZC3HAV1L", "ZNF532"


In [None]:
aggexp_cd4  <- AddModuleScore(aggexp_cd4, ctrl = 50, name = "Eff_Module_Score",
                              features = list(c(
"ACTN4", "ADAM19", "AHR", "AKIRIN2", "ALCAM", "ALOX5AP", "SLF1", "ANTXR2", "ANXA1", "ANXA2", "ANXA2P1", "ANXA4", "AQP3", "ARHGAP18", "ASB2", "ATP2B4", "ATXN1", "B3GNT9", "BTG3", "SMCO4", "LACC1", "MIR22HG", "SMIM29", "CALHM2", "CAPG", "CASK", "CAST", "CBLL1", "CCDC107", "CCL5", "CCR2", "CCR6", "CD226", "CD28", "CD58", "CD63", "CD74", "CD84", "CDC42EP3", "CDK2AP2", "CHST7", "CLDND1", "CLIC1", "CLU", "CNPPD1", "COTL1", "CPPED1", "CREB3L2", "CRELD2", "CRIP1", "CRYBG3", "CTSA", "CTSC", "CXCR3", "DNAI2", "DUSP16", "DUSP5", "EFHD2", "AGO4", "EIF3A", "EIF4EBP2", "ELOVL5", "EPS15", "EVI2B", "NIBAN1", "ZC2HC1A", "PIEZO1", "FAR2", "FAS", "FBXL8", "FRMD4B", "GBP3", "GCLM", "GDPD5", "GLIPR1", "GOLGA7", "GSTK1", "GZMA", "GZMK", "HLA-DPA1", "HMGN4", "JPT1", "HNRNPLL", "HOPX", "IFI16", "IFI27", "IFNG", "IL10RA", "IL15RA", "IQGAP1", "IQGAP2", "ITGB1", "MATCAP1", "KIF1B", "KIF21A", "KLRB1", "LDHA", "LGALS1", "LGALS3", "LIMS1", "LIMS3", "WDR86-AS1", "", "", "", "", "MAP3K5", "MDFIC", "MFHAS1", "MIAT", "MIB1", "MICAL2", "MIS18BP1", "MLF1", "AFDN", "MTSS1", "MYBL1", "MYL6", "MYO1F", "NCAPH", "NCF4", "NETO2", "NINJ2", "NMU", "NOD2", "NPC1", "NPDC1", "NABP1", "NABP2", "OGDH", "OGFRL1", "OSBPL3", "PAM", "PDIA6", "PEA15", "PFKL", "PHACTR2", "PHTF2", "PLXNC1", "PPIF", "PPP1CA", "PREX1", "PRR5L", "PTTG1", "NECTIN3", "RAB27A", "REEP3", "REEP5", "RFTN1", "RGS3", "RHOU", "RNF126", "RNF149", "RORA", "S100A11", "S100A4", "SAP30", "SEC11C", "SH2D1A", "SH3BGRL3", "SH3BP5", "SLC2A3", "SLC35D2", "NHERF1", "SMAP1", "SPAG1", "SPOPL", "SRGN", "SSR3", "ST8SIA1", "STOM", "STX11", "TBCB", "TBX21", "TIGIT", "TLR3", "TMEM116", "TMEM156", "TMEM200A", "TMEM64", "TMX4", "TNF", "TNFRSF4", "TOR3A", "TP53INP1", "TPM4", "TRAC", "TRADD", "TTC39C", "TTYH2", "TXN", "TYMP", "UBL3", "USP46", "UST", "VCL", "YWHAH", "ZBTB38", "ZC3HAV1L", "ZNF532"
)))

In [None]:
VlnPlot(aggexp_cd4, features = "Eff_Module_Score1", group.by = "Condition") +
ggpubr::stat_compare_means(comparisons = list(c(1,2)))

In [None]:
df  <- data.frame(score = aggexp_cd4$Eff_Module_Score1,
                 Condition = aggexp_cd4$Condition)

In [None]:
df  %>% ggplot(aes(x = Condition, y = score)) +
geom_violin(scale = "width", aes(fill = Condition), alpha = 0.4) +
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
theme_classic() +
ggtheme() +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
theme(axis.text.x = element_blank(),
      axis.ticks.x = element_blank())


In [None]:
aggexp_cd8  <- AddModuleScore(aggexp_cd8, ctrl = 50, name = "Eff_Module_Score",
                              features = list(c(
"ACTN4", "ADAM19", "AHR", "AKIRIN2", "ALCAM", "ALOX5AP", "SLF1", "ANTXR2", "ANXA1", "ANXA2", "ANXA2P1", "ANXA4", "AQP3", "ARHGAP18", "ASB2", "ATP2B4", "ATXN1", "B3GNT9", "BTG3", "SMCO4", "LACC1", "MIR22HG", "SMIM29", "CALHM2", "CAPG", "CASK", "CAST", "CBLL1", "CCDC107", "CCL5", "CCR2", "CCR6", "CD226", "CD28", "CD58", "CD63", "CD74", "CD84", "CDC42EP3", "CDK2AP2", "CHST7", "CLDND1", "CLIC1", "CLU", "CNPPD1", "COTL1", "CPPED1", "CREB3L2", "CRELD2", "CRIP1", "CRYBG3", "CTSA", "CTSC", "CXCR3", "DNAI2", "DUSP16", "DUSP5", "EFHD2", "AGO4", "EIF3A", "EIF4EBP2", "ELOVL5", "EPS15", "EVI2B", "NIBAN1", "ZC2HC1A", "PIEZO1", "FAR2", "FAS", "FBXL8", "FRMD4B", "GBP3", "GCLM", "GDPD5", "GLIPR1", "GOLGA7", "GSTK1", "GZMA", "GZMK", "HLA-DPA1", "HMGN4", "JPT1", "HNRNPLL", "HOPX", "IFI16", "IFI27", "IFNG", "IL10RA", "IL15RA", "IQGAP1", "IQGAP2", "ITGB1", "MATCAP1", "KIF1B", "KIF21A", "KLRB1", "LDHA", "LGALS1", "LGALS3", "LIMS1", "LIMS3", "WDR86-AS1", "", "", "", "", "MAP3K5", "MDFIC", "MFHAS1", "MIAT", "MIB1", "MICAL2", "MIS18BP1", "MLF1", "AFDN", "MTSS1", "MYBL1", "MYL6", "MYO1F", "NCAPH", "NCF4", "NETO2", "NINJ2", "NMU", "NOD2", "NPC1", "NPDC1", "NABP1", "NABP2", "OGDH", "OGFRL1", "OSBPL3", "PAM", "PDIA6", "PEA15", "PFKL", "PHACTR2", "PHTF2", "PLXNC1", "PPIF", "PPP1CA", "PREX1", "PRR5L", "PTTG1", "NECTIN3", "RAB27A", "REEP3", "REEP5", "RFTN1", "RGS3", "RHOU", "RNF126", "RNF149", "RORA", "S100A11", "S100A4", "SAP30", "SEC11C", "SH2D1A", "SH3BGRL3", "SH3BP5", "SLC2A3", "SLC35D2", "NHERF1", "SMAP1", "SPAG1", "SPOPL", "SRGN", "SSR3", "ST8SIA1", "STOM", "STX11", "TBCB", "TBX21", "TIGIT", "TLR3", "TMEM116", "TMEM156", "TMEM200A", "TMEM64", "TMX4", "TNF", "TNFRSF4", "TOR3A", "TP53INP1", "TPM4", "TRAC", "TRADD", "TTC39C", "TTYH2", "TXN", "TYMP", "UBL3", "USP46", "UST", "VCL", "YWHAH", "ZBTB38", "ZC3HAV1L", "ZNF532"
)))

In [None]:
VlnPlot(aggexp_cd8, features = "Eff_Module_Score1", group.by = "Condition") +
ggpubr::stat_compare_means(comparisons = list(c(1,2)))

In [None]:
df  <- data.frame(score = aggexp_cd8$Eff_Module_Score1,
                 Condition = aggexp_cd8$Condition)

In [None]:
df  %>% ggplot(aes(x = Condition, y = score)) +
geom_violin(scale = "width", aes(fill = Condition), alpha = 0.4) +
stat_summary(fun = "median",
               geom = "crossbar", 
               width = 0.75,
               color = "grey30") +
geom_beeswarm(size = 3, aes(fill = Condition), cex = 3, 
                shape = 21, color = "black", corral = "random") +
 stat_compare_means(label = "p.format", comparisons = list(c(1,2))) +
theme_classic() +
ggtheme() +
scale_fill_manual(values = c("#1874cdff",  "#ee6363ff", "#c41515ff","#eeb4b4ff")) +
scale_color_manual(values = c("dodgerblue3",  "#e54c4cff", "#aa2a2aff","#e88989ff")) +
ylab("") +
  xlab("") +
theme(axis.text.x = element_blank(),
      axis.ticks.x = element_blank())