---

title: "All cells scVI integrated"

author:

- name: Paula Nieto

  affiliation:

  - Centro Nacional de Análisis Genómico (CNAG)

  email: paula.nieto@cnag.crg.eu

date: '`r format(Sys.Date(), "%B %d, %Y")`'

output:

  html_document:

    toc: true

    toc_float: true

---

In [None]:
knitr::opts_chunk$set(
  echo = FALSE,
  results = "hide",
  warning = FALSE,
  message = FALSE,
  tidy = TRUE
)

In [None]:
tryCatch({
  source("/scratch_isilon/groups/singlecell/pnieto/scripts/r_utils/utils.R")
}, error = function(e) {
  source("S:/scripts/r_utils/utils.R")
})
library(dittoSeq)

In [None]:
root <- get_root_dir()
proj_dir <- glue("{root}/CSF")
data_dir <- glue("{proj_dir}/output/integration/All cells")
out_dir <- data_dir

In [None]:
# create output folder if it doesn't exist yet
if (!file.exists(out_dir)) {
  dir.create(out_dir)
}

In [None]:
data <- readRDS(glue("{data_dir}/All_cells_annotated_updated.rds"))
# Idents(data) <- "leiden_res_0.05"

In [None]:
# set up color palettes
pal_patients <- as.vector(pals::polychrome())
pal_projects <-as.vector(pals::alphabet())
pal_disease <-as.vector(pals::trubetskoy())[1:4]
pal_clusters <- c(ggsci::pal_d3("category20")(20), ggsci::pal_d3("category20b")(20))
pal_gene_exp <- c("#ADD8E633", "#E46726")

# set up color palettes
pal_i_annot <- c("green4", "#d6604d", "orange2", "purple4", "#4393c3")
names(pal_i_annot) <- unique(data$i_annot)

pal_i_annot_2 <- as.vector(pals::trubetskoy())[5:22]
names(pal_i_annot_2) <- unique(data$i_annot_2)

pal_lv2_annot <- c(as.vector(pals::polychrome()), "black", "#f4f592", "forestgreen")
names(pal_lv2_annot) <- unique(data$lv2_annot)

In [None]:
# recode "Proliferative" cluster
data$i_annot[data$lv2_annot == "Macrophages proliferative"] <- "Myeloid"
data$i_annot_2[data$lv2_annot == "Macrophages proliferative"] <- "Macrophages"

data$i_annot[data$lv2_annot == "T cells proliferative"] <- "T cells"
data$i_annot_2[data$lv2_annot == "T cellss proliferative"] <- "CD8"

data$i_annot[data$general == "CD45-"] <- "Non-immune"
data$i_annot_2[data$general == "CD45-"] <- "Non-immune"

# Integration

In [None]:
data <- readRDS(glue("{data_dir}/all_cells_merged.rds"))

data <- data %>%
  NormalizeData() %>%
  FindVariableFeatures(nfeatures = 3000) %>%
  ScaleData(verbose = FALSE) %>%
  RunPCA(verbose = FALSE) %>%
  RunUMAP(dims = 1:20, verbose = FALSE) %>%
  RunUMAP(dims = 1:20, verbose = FALSE, reduction.key = "UMAP_scVI_", reduction.name = "umap_scvi")

# fix mistaken patient
data$patient[data$library == 3885] <- "P12"
data$patient_sample <- paste(data$patient, data$sample, sep = "_")

In [None]:
data$disease <- "none"
data$disease[data$patient %in% c("P01", "P06", "P10", "P11", "P15")] <- "Lymphoma"
data$disease[data$patient %in% c("P02", "P05")] <- "Glioblastoma"
data$disease[data$patient %in% c("P03", "P08", "P09", "P12", "P16")] <- "Brain met"
data$disease[data$patient %in% c("P04", "P07", "P13", "P14")] <- "Inflammatory"

In [None]:
# Read the CSV file with the clusters from scVI
clusters <- read.csv(glue("{data_dir}/All_cells_scVI_clusters.csv"))
rownames(clusters) <- clusters$X
clusters$X <- NULL
# add clusters metadata to seu obj
data <- AddMetaData(data, metadata = clusters)

umap <- read.csv(glue("{data_dir}/All_cells_scVI_UMAP.csv"))
rownames(umap) <- umap$X
umap$X <- NULL
colnames(umap) <- c("UMAPscVI_1", "UMAPscVI_2")
data@reductions$umap_scvi@cell.embeddings <- as.matrix(umap)
Idents(data) <- "leiden_res_0.05"

Uncorrected UMAP

In [None]:
DimPlot(
  data,
  pt.size = 1,
  reduction = "umap",
  group.by = "patient_sample",
  shuffle = TRUE,
  cols = pal_patients
  ) +
  DimPlot(
  data,
  pt.size = 1,
  reduction = "umap",
  group.by = "project",
  shuffle = TRUE,
  cols = pal_projects
  )

scVI corrected UMAP

In [None]:
DimPlot(
  data,
  pt.size = 1,
  reduction = "umap_scvi",
  group.by = "patient_sample",
  shuffle = TRUE,
  cols = pal_patients
  ) +
  DimPlot(
  data,
  pt.size = 1,
  reduction = "umap_scvi",
  group.by = "project",
  shuffle = TRUE,
  cols = pal_projects
  )

# Clustering

In [None]:
DimPlot(
  data,
  pt.size = 1,
  reduction = "umap_scvi",
  group.by = "leiden_res_0.05",
  cols = pal_clusters,
  label = TRUE
  )

In [None]:
Idents(data) <- "leiden_res_0.05"
markers <- FindAllMarkers(
  object = data,
  only.pos = TRUE,
  max.cells.per.ident = 1000
)

# save file
saveRDS(markers, glue("{out_dir}/markers/marker_genes_res0.05_scVI_seu.rds"))
# write to excel file
openxlsx::write.xlsx(split(markers, markers$cluster),
                     file = glue("{out_dir}/markers/marker_genes_res0.05_scVI_seu.xlsx"),
                     overwrite = TRUE)

In [None]:
FeaturePlot(
  data,
  features = c("CD3E", "CD3D", "CD4", "CD8B", "CD68", "MS4A1"),
  order = TRUE,
  pt.size = 1,
  ncol = 3,
  label = TRUE,
  reduction = "umap_scvi",
  cols = pal_gene_exp,
)

# Annotation

Original automatic annotation

In [None]:
DimPlot(
  data,
  group.by = "auto_annot",
  cols = as.vector(pals::polychrome()),
  pt.size = 1,
  shuffle = TRUE,
  reduction = "umap_scvi",
)

## CD45+/- annotation

In [None]:
FeaturePlot(
  data,
  features = c("PTPRC"),
  # pt.size = 1,
  # label = TRUE,
  reduction = "umap_scvi",
  cols = pal_gene_exp,
)

In [None]:
Idents(data) <- "CD45+"
data <- CellSelector(plot = FeaturePlot(data, "PTPRC", order = TRUE, reduction = "umap_scvi"), object = data, ident = "CD45-")
data$general <- Idents(data)
data$general <- as.character(data$general)

In [None]:
DimPlot(
  data,
  group.by = "general",
  reduction = "umap_scvi"
)

## General annotation

In [None]:
# load Immune cells to filter out bad quality cells
immune <- readRDS(glue("{proj_dir}/output/integration/Immune cells/Immune_cells_annotated_filtered_lv2.rds"))@meta.data

immune$barcode <- rownames(immune)
immune <- immune[, c("barcode",  "i_annot_2", "i_annot", "lv2_annot")]

rownames(immune) <- immune$barcode
data <- AddMetaData(data, immune[, c("i_annot_2", "i_annot", "lv2_annot")])
data$lv2_annot <- as.character(data$lv2_annot)
table(is.na(data$lv2_annot))

data$lv2_annot[data$general == "CD45-"] <- "Non-immune"
table(is.na(data$lv2_annot))

data <- data[, !is.na(data@meta.data[, "lv2_annot"])]
saveRDS(data, glue("{data_dir}/All_cells_annotated_filtered_lv2.rds"))

In [None]:
# load Immune cells to filter out bad quality cells
immune <- readRDS(glue("{proj_dir}/output/integration/Immune cells/Immune_cells_annotated_filtered_lv2.rds"))@meta.data

immune$barcode <- rownames(immune)
immune <- immune[, c("barcode",  "i_annot_2", "i_annot", "lv2_annot")]

rownames(immune) <- immune$barcode
data <- AddMetaData(data, immune[, c("i_annot_2", "i_annot", "lv2_annot")])
data$lv2_annot <- as.character(data$lv2_annot)
table(is.na(data$lv2_annot))

data$lv2_annot[data$general == "CD45-"] <- "Non-immune"
table(is.na(data$lv2_annot))

data <- data[, !is.na(data@meta.data[, "lv2_annot"])]
saveRDS(data, glue("{data_dir}/All_cells_annotated_filtered_lv2.rds"))

In [None]:
DimPlot(
  data,
  pt.size = 1,
  reduction = "umap_scvi",
  group.by = "leiden_res_1",
  cols = pal_clusters,
  label = TRUE
  )

In [None]:
data$leiden_res_1 <- as.character(data$leiden_res_1)

# assign annotation to i_annot
data$barcode <- rownames(data@meta.data)
df_meta <- data@meta.data[, c("barcode", "leiden_res_1")]
df_annot <- data.frame(
  "leiden_res_1" = as.character(c(0:24)),
  "i_annot" = c("T cells", "T cells", "T cells", "T cells", "Myeloid", "B cells", "Myeloid", "T cells", "T cells", "T cells", "T cells", "Myeloid", "T cells", "B cells", "T cells", "Myeloid", "Myeloid", "T cells", "Proliferative", "B cells", "B cells", "Myeloid", "Non-immune", "Myeloid", "Myeloid"),
  "i_annot_2" = c("CD8", "CD4", "CD4", "CD4", "Macrophages", "B cells", "Monocytes", "CD4", "CD4", "NK", "CD8", "DC", "CD8", "B cells", "CD8", "Monocytes", "DC", "CD4", "Proliferative", "B cells", "B cells", "DC", "Non-immune", "DC", "DC")
)
df_meta <- merge(df_meta, df_annot, all.x = TRUE)
rownames(df_meta) <- df_meta$barcode
data <- AddMetaData(data, metadata = df_meta[,c("i_annot", "i_annot_2")])
data$i_annot <- as.character(data$i_annot)
data$i_annot_2 <- as.character(data$i_annot_2)
saveRDS(data, glue("{data_dir}/All_cells_annotated_filtered_lv2.rds"))

In [None]:
DimPlot(
  data,
  pt.size = 1,
  reduction = "umap_scvi",
  group.by = "i_annot",
  cols = pal_i_annot
  ) +
DimPlot(
  data,
  pt.size = 1,
  reduction = "umap_scvi",
  group.by = "i_annot_2",
  cols = pal_i_annot_2
  )  +
  plot_annotation(caption = glue("{dim(data)[2]} cells")) &
  theme(text = element_text(size = 20))

In [None]:
DimPlot(
  data,
  group.by = "disease",
  cols = pal_disease,
  pt.size = 1,
  shuffle = TRUE,
  reduction = "umap_scvi"
)

In [None]:
p1 <- table(data@meta.data[, c("i_annot_2", "disease")]) %>%
  as.data.frame() %>%
  ggplot(aes(fill=disease, y=Freq, x=i_annot_2)) +
  geom_bar(position="fill", stat="identity") +
  theme_classic() +
  labs(x = "Cluster", y = "Proportion", fill = "Disease") +
  scale_fill_manual(values = pal_disease) +
  RotatedAxis()

p2 <- table(data@meta.data[, c("i_annot_2")]) %>%
  as.data.frame() %>%
  ggplot(aes(y=Freq, x=Var1)) +
  geom_bar(stat="identity") +
  theme_minimal_hgrid() +
  labs(y = "Number of cells") +
  theme(axis.ticks.x = element_blank(), axis.text.x = element_blank(), axis.title.x = element_blank())

(p2 + p1)  +
  plot_layout(heights = c(4, 10)) & theme(text = element_text(size = 12))

In [None]:
p1 <- table(data@meta.data[, c("i_annot", "disease")]) %>%
  as.data.frame() %>%
  ggplot(aes(fill=disease, y=Freq, x=i_annot)) +
  geom_bar(position="fill", stat="identity") +
  theme_classic() +
  labs(x = "Cluster", y = "Proportion", fill = "Disease") +
  scale_fill_manual(values = pal_disease) +
  RotatedAxis()

p2 <- table(data@meta.data[, c("i_annot")]) %>%
  as.data.frame() %>%
  ggplot(aes(y=Freq, x=Var1)) +
  geom_bar(stat="identity") +
  theme_minimal_hgrid() +
  labs(y = "Number of cells") +
  theme(axis.ticks.x = element_blank(), axis.text.x = element_blank(), axis.title.x = element_blank())

(p2 + p1)  +
  plot_layout(heights = c(4, 10)) & theme(text = element_text(size = 20))

In [None]:
t <- table(data@meta.data[, c("i_annot", "disease", "patient_sample")]) %>%
  as.data.frame() %>%
  filter(Freq > 0) %>%
  mutate_if(is.factor,as.character) %>%
  arrange(disease)
t$patient_sample <-  factor(t$patient_sample, levels = t[, c("patient_sample", "disease")] %>% unique() %>% pull(patient_sample))

# bar at the top
p1 <-  ggplot(t, aes(y=-1, fill=disease, x=patient_sample)) +
  geom_tile() +
  scale_fill_manual(values = pal_disease) +
  theme_void() +
  labs(fill = "Disease")

# actual barplot
p2 <- ggplot(t, aes(fill=i_annot, y=Freq, x=patient_sample)) +
  geom_bar(position="fill", stat="identity") +
  theme_classic() +
  labs(x = "Patient", y = "Proportion", fill = "Cell type") +
  scale_fill_manual(values = pal_i_annot) +
  RotatedAxis()

t2 <- table(data$patient_sample) %>%
  as.data.frame()
t2$Var1 <-  factor(t2$Var1, levels = t[, c("patient_sample", "disease")] %>% unique() %>% pull(patient_sample))

p3 <- ggplot(t2, aes(y=Freq, x=Var1)) +
  geom_bar(stat="identity") +
  theme_minimal_hgrid() +
  labs(y = "Number of cells") +
  theme(axis.ticks.x = element_blank(), axis.text.x = element_blank(),
        axis.title.x = element_blank())

# patchwork
(p3 +p1 + p2)  +
  plot_layout(heights = c(4, 1, 10), guides="collect") &
  theme(legend.justification = "left")

In [None]:
t <- table(data@meta.data[, c("i_annot_2", "disease", "patient_sample")]) %>%
  as.data.frame() %>%
  filter(Freq > 0) %>%
  mutate_if(is.factor,as.character) %>%
  arrange(disease)
t$patient_sample <-  factor(t$patient_sample, levels = t[, c("patient_sample", "disease")] %>% unique() %>% pull(patient_sample))

# bar at the top
p1 <-  ggplot(t, aes(y=-1, fill=disease, x=patient_sample)) +
  geom_tile() +
  scale_fill_manual(values = pal_disease) +
  theme_void() +
  labs(fill = "Disease")

# actual barplot
p2 <- ggplot(t, aes(fill=i_annot_2, y=Freq, x=patient_sample)) +
  geom_bar(position="fill", stat="identity") +
  theme_classic() +
  labs(x = "Patient", y = "Proportion", fill = "Cell type") +
  scale_fill_manual(values = pal_i_annot_2
  ) +
  RotatedAxis()

t2 <- table(data$patient_sample) %>%
  as.data.frame()
t2$Var1 <-  factor(t2$Var1, levels = t[, c("patient_sample", "disease")] %>% unique() %>% pull(patient_sample))

p3 <- ggplot(t2, aes(y=Freq, x=Var1)) +
  geom_bar(stat="identity") +
  theme_minimal_hgrid() +
  labs(y = "Number of cells") +
  theme(axis.ticks.x = element_blank(), axis.text.x = element_blank(),
        axis.title.x = element_blank())

# patchwork
(p3 +p1 + p2)  +
  plot_layout(heights = c(4, 1, 10), guides="collect") &
  theme(legend.justification = "left")

In [None]:
Idents(data) <- "i_annot_2"
markers <- FindAllMarkers(
  object = data,
  only.pos = TRUE,
  max.cells.per.ident = 500
)

# save file
saveRDS(markers, glue("{out_dir}/markers/all_cells_annotation_markers.rds"))
# write to excel file
openxlsx::write.xlsx(split(markers, markers$cluster),
                     file = glue("{out_dir}/markers/all_cells_annotation_markers.xlsx"),
                     overwrite = TRUE)

### Kruskal-Wallis Test

In [None]:
DimPlot(
  data,
  split.by = "disease",
  group.by = "i_annot",
  cols = pal_i_annot,
  pt.size = 1,
  shuffle = TRUE,
  reduction = "umap_scvi",
  ncol = 2
)

In [None]:
t <- table(data@meta.data[, c("i_annot", "disease", "patient_sample")]) %>%
  as.data.frame() %>%
  filter(Freq > 0) %>%
  mutate_if(is.factor,as.character) %>%
  arrange(disease)
t$patient_sample <-  factor(t$patient_sample, levels = t[, c("patient_sample", "disease")] %>% unique() %>% pull(patient_sample))

t$total_cells <- ave(t$Freq, t$patient_sample, FUN=sum)
t$prop <- t$Freq/t$total_cells

my_comparisons <- list( c("Lymphoma", "Glioblastoma"),
                        c("Lymphoma", "Inflammatory"),
                        c("Lymphoma", "Brain met"),
                        c("Inflammatory", "Glioblastoma"),
                        c("Brain met", "Glioblastoma"),
                        c("Inflammatory", "Brain met") )

ggboxplot(t, x = "disease", y = "prop",
          fill = "disease", add = "jitter",
          facet.by = "i_annot", short.panel.labs = TRUE) +
  stat_compare_means(comparisons = my_comparisons, label = "p.signif") +
  scale_fill_manual(values = pal_disease) +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())

In [None]:
library(reshape2)
library(RColorBrewer)
library(gplots)

# Create a pivot table to have rows as patients and columns as cell types
data_pivot <- dcast(t, patient_sample ~ i_annot, value.var = "prop")

# Create a matrix of disease annotations for each patient
disease_annotations <- t[match(data_pivot$patient_sample, t$patient_sample), "disease"]

# Generate a color palette for the heatmap
heatmap_colors <- colorRampPalette(rev(brewer.pal(9, "RdYlBu")))(50)

colSide <- pal_disease[1:4][as.numeric(as.factor(disease_annotations))]
rownames(data_pivot) <- data_pivot$patient_sample
heatmap(as.matrix(data_pivot[, -1]), RowSideColors = colSide, col = heatmap_colors)#, Colv = NA)

***

In [None]:
DimPlot(
  data,
  split.by = "disease",
  group.by = "i_annot_2",
  cols = pal_i_annot_2,
  pt.size = 1,
  shuffle = TRUE,
  reduction = "umap_scvi",
  ncol = 2
)

In [None]:
t <- table(data@meta.data[, c("i_annot_2", "disease", "patient_sample")]) %>%
  as.data.frame() %>%
  filter(Freq > 0) %>%
  mutate_if(is.factor,as.character) %>%
  arrange(disease)
t$patient_sample <-  factor(t$patient_sample, levels = t[, c("patient_sample", "disease")] %>% unique() %>% pull(patient_sample))

t$total_cells <- ave(t$Freq, t$patient_sample, FUN=sum)
t$prop <- t$Freq/t$total_cells

my_comparisons <- list( c("Lymphoma", "Glioblastoma"),
                        c("Lymphoma", "Inflammatory"),
                        c("Lymphoma", "Brain met"),
                        c("Inflammatory", "Glioblastoma"),
                        c("Brain met", "Glioblastoma"),
                        c("Inflammatory", "Brain met") )

ggboxplot(t, x = "disease", y = "prop",
          fill = "disease", add = "jitter",
          facet.by = "i_annot_2", short.panel.labs = TRUE) +
  stat_compare_means(comparisons = my_comparisons, label = "p.signif") +
  scale_fill_manual(values = pal_disease) +
  theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())

In [None]:
# Create a pivot table to have rows as patients and columns as cell types
data_pivot <- dcast(t, patient_sample ~ i_annot_2, value.var = "prop")

# Create a matrix of disease annotations for each patient
disease_annotations <- t[match(data_pivot$patient_sample, t$patient_sample), "disease"]

# Generate a color palette for the heatmap
heatmap_colors <- colorRampPalette(rev(brewer.pal(9, "RdYlBu")))(50)

colSide <- pal_disease[1:4][as.numeric(as.factor(disease_annotations))]
rownames(data_pivot) <- data_pivot$patient_sample
heatmap(as.matrix(data_pivot[, -1]), RowSideColors = colSide, col = heatmap_colors)#, Colv = NA)

### scCODA

In [None]:
sccoda <- read_csv(glue("{data_dir}/All_cellsi_annot_2_scCODA.csv"))

# rename covariate comparison (contrasts)
sccoda <- sccoda %>%
  mutate(comparison = recode(
    Covariate,
    "C(disease, Treatment('Lymphoma'))T.Brain met" = "BM v L",
    "C(disease, Treatment('Lymphoma'))T.Glioblastoma" = "G v L",
    "C(disease, Treatment('Lymphoma'))T.Inflammatory" = "I v L",
    "C(disease, Treatment('Glioblastoma'))T.Brain met" = "BM v G",
    "C(disease, Treatment('Glioblastoma'))T.Inflammatory" = "I v G",
    "C(disease, Treatment('Glioblastoma'))T.Lymphoma" = "L v G",
    "C(disease, Treatment('Brain met'))T.Glioblastoma" = "G v BM",
    "C(disease, Treatment('Brain met'))T.Inflammatory" = "I v BM",
    "C(disease, Treatment('Brain met'))T.Lymphoma" = "L v BM",
    "C(disease, Treatment('Inflammatory'))T.Brain met" = "BM v I",
    "C(disease, Treatment('Inflammatory'))T.Glioblastoma" = "G v I",
    "C(disease, Treatment('Inflammatory'))T.Lymphoma" = "L v I"
    )
  )

# filter out duplicate comparisons
sccoda <- sccoda %>%
  filter(comparison %in% c("BM v I", "L v I", "G v I", "BM v L", "G v L", "BM v G"))

sccoda$comparison <- factor(sccoda$comparison, levels = c("BM v I", "L v I", "G v I", "BM v L", "G v L", "BM v G"))

sccoda <- sccoda %>%
  mutate(significant = case_when(
    `Final Parameter` == 0 ~ "False",
    `Final Parameter` != 0 ~ "True")
    )

In [None]:
sccoda %>%
  ggplot(aes(x = comparison, y = `Cell Type`, size = significant, color = as.numeric(`log2-fold change`))) +
  geom_point() +
  theme_classic() +
  RotatedAxis() +
  scale_color_gradient2(
    low = "#3B99B1FF",
    mid = "#EACB2BFF",
    high = "#F5191CFF"
    ) +
  scale_size_manual(values = c(4, 12)) +
  labs(title = "scCODA results", x = "Comparison", color = "Log2 FC", size = "Significant") +
  theme(text = element_text(size = 18))

In [None]:
sccoda <- read_csv(glue("{data_dir}/All_cellsi_annot_scCODA.csv"))

# rename covariate comparison (contrasts)
sccoda <- sccoda %>%
  mutate(comparison = recode(
    Covariate,
    "C(disease, Treatment('Lymphoma'))T.Brain met" = "BM v L",
    "C(disease, Treatment('Lymphoma'))T.Glioblastoma" = "G v L",
    "C(disease, Treatment('Lymphoma'))T.Inflammatory" = "I v L",
    "C(disease, Treatment('Glioblastoma'))T.Brain met" = "BM v G",
    "C(disease, Treatment('Glioblastoma'))T.Inflammatory" = "I v G",
    "C(disease, Treatment('Glioblastoma'))T.Lymphoma" = "L v G",
    "C(disease, Treatment('Brain met'))T.Glioblastoma" = "G v BM",
    "C(disease, Treatment('Brain met'))T.Inflammatory" = "I v BM",
    "C(disease, Treatment('Brain met'))T.Lymphoma" = "L v BM",
    "C(disease, Treatment('Inflammatory'))T.Brain met" = "BM v I",
    "C(disease, Treatment('Inflammatory'))T.Glioblastoma" = "G v I",
    "C(disease, Treatment('Inflammatory'))T.Lymphoma" = "L v I"
    )
  )

# filter out duplicate comparisons
sccoda <- sccoda %>%
  filter(comparison %in% c("BM v I", "L v I", "G v I", "BM v L", "G v L", "BM v G"))

sccoda$comparison <- factor(sccoda$comparison, levels = c("BM v I", "L v I", "G v I", "BM v L", "G v L", "BM v G"))

sccoda <- sccoda %>%
  mutate(significant = case_when(
    `Final Parameter` == 0 ~ "False",
    `Final Parameter` != 0 ~ "True")
    )

In [None]:
sccoda %>%
  ggplot(aes(x = comparison, y = `Cell Type`, size = significant, color = as.numeric(`log2-fold change`))) +
  geom_point() +
  theme_classic() +
  RotatedAxis() +
  scale_color_gradient2(
    low = "#3B99B1FF",
    mid = "#EACB2BFF",
    high = "#F5191CFF"
    ) +
  scale_size_manual(values = c(4, 12)) +
  labs(title = "scCODA results", x = "Comparison", color = "Log2 FC", size = "Significant") +
  theme(text = element_text(size = 18))

***

## Level 2 annotation

In [None]:
# save counts
write.table(as.matrix(GetAssayData(object = data, slot = "counts")),
            glue("{out_dir}/integrated_All_cells_counts.csv"),
            sep = ',', row.names = TRUE, col.names = TRUE, quote = FALSE)
# save metadata
write.table(
  data@meta.data,
  glue("{out_dir}/integrated_All_cells_metadata.csv"),
  sep = ',', row.names = TRUE, col.names = TRUE, quote = FALSE
)

In [None]:
DimPlot(
  data,
  group.by = "lv2_annot",
  cols = pal_lv2_annot,
  pt.size = 1,
  shuffle = TRUE,
  reduction = "umap_scvi"
) +
  theme(text = element_text(size = 20)) +
  guides(color=guide_legend(ncol = 1, override.aes = list(size = 5)))

In [None]:
t <- table(data@meta.data[, c("lv2_annot", "disease", "patient_sample")]) %>%
  as.data.frame() %>%
  filter(Freq > 0) %>%
  mutate_if(is.factor,as.character) %>%
  arrange(disease)
t$patient_sample <-  factor(t$patient_sample, levels = t[, c("patient_sample", "disease")] %>% unique() %>% pull(patient_sample))

t$total_cells <- ave(t$Freq, t$patient_sample, FUN=sum)
t$prop <- t$Freq/t$total_cells

# Create a pivot table to have rows as patients and columns as cell types
data_pivot <- dcast(t, patient_sample ~ lv2_annot, value.var = "prop")
data_pivot[is.na(data_pivot)] <- 0

# Create a matrix of disease annotations for each patient
disease_annotations <- t[match(data_pivot$patient_sample, t$patient_sample), "disease"]

# Generate a color palette for the heatmap
heatmap_colors <- colorRampPalette(rev(brewer.pal(9, "RdYlBu")))(50)

colSide <- pal_disease[1:4][as.numeric(as.factor(disease_annotations))]
rownames(data_pivot) <- data_pivot$patient_sample
heatmap(as.matrix(data_pivot[, -1]), RowSideColors = colSide, col = heatmap_colors, margins = c(9,5), cexRow = 0.5, cexCol = 0.5)#, Colv = NA)

In [None]:
data <- readRDS(glue("{data_dir}/All_cells_annotated_no_neu.rds"))
# group malignant cells
data$lv2_annot[data$lv2_annot %in% c("B cell Malignant III", "B cell Malignant I", "B cell Malignant II")] <- "B cell Malignant"
data <- subset(data, lv2_annot == "Neutrophils", invert = TRUE)
# recode myelod annotation
data$lv2_annot <- as.character(data$lv2_annot)
data$lv2_annot <- recode(
  data$lv2_annot,
  'Border TAMs' = "BAMs",
  'DC CD1C' = "DC2",
  'DC ITGAX' = "DC5",
  'DC non-inflammatory CD1C' = "DC2",
  'DC1 CLEC9A' = "DC1",
  'DCs mreg LAMP3' = "DC mreg",
  'Macrphages IFN producing' = "Macrophages IFN signaling",
  'Microglia TAM' = "Microglia-like",
  'pvBAMs MRC1hi' = "BAMs"
)
saveRDS(data, glue("{data_dir}/All_cells_annotated_updated.rds"))

In [None]:
# for Sam
# remove sequential samples
data <- subset(data, sample %in% c("S07", "S15", "S21", "S18"), invert = TRUE)
data@meta.data <- data@meta.data[, c("nCount_RNA", "nFeature_RNA", "percent.mt", "project", "sample", "disease","patient", "i_annot", "i_annot_2", "lv2_annot")]
colnames(data@meta.data) <- c("nCount_RNA", "nFeature_RNA", "percent.mt", "project", "sample", "disease", "patient", "general_annot", "lv1_annot", "lv2_annot")

saveRDS(data, "/scratch_isilon/groups/singlecell/shared/projects/CSF/data/All cells/all_cells_updated.rds")

### scCODA

In [None]:
sccoda <- read_csv(glue("{data_dir}/All_cellslv2_annot_scCODA.csv"))

# rename covariate comparison (contrasts)
sccoda <- sccoda %>%
  mutate(comparison = recode(
    Covariate,
    "C(disease, Treatment('Lymphoma'))T.Brain met" = "BM v L",
    "C(disease, Treatment('Lymphoma'))T.Glioblastoma" = "G v L",
    "C(disease, Treatment('Lymphoma'))T.Inflammatory" = "I v L",
    "C(disease, Treatment('Glioblastoma'))T.Brain met" = "BM v G",
    "C(disease, Treatment('Glioblastoma'))T.Inflammatory" = "I v G",
    "C(disease, Treatment('Glioblastoma'))T.Lymphoma" = "L v G",
    "C(disease, Treatment('Brain met'))T.Glioblastoma" = "G v BM",
    "C(disease, Treatment('Brain met'))T.Inflammatory" = "I v BM",
    "C(disease, Treatment('Brain met'))T.Lymphoma" = "L v BM",
    "C(disease, Treatment('Inflammatory'))T.Brain met" = "BM v I",
    "C(disease, Treatment('Inflammatory'))T.Glioblastoma" = "G v I",
    "C(disease, Treatment('Inflammatory'))T.Lymphoma" = "L v I"
    )
  )

# filter out duplicate comparisons
sccoda <- sccoda %>%
  filter(comparison %in% c("BM v I", "L v I", "G v I", "BM v L", "G v L", "BM v G"))

sccoda$comparison <- factor(sccoda$comparison, levels = c("BM v I", "L v I", "G v I", "BM v L", "G v L", "BM v G"))

sccoda <- sccoda %>%
  mutate(significant = case_when(
    `Final Parameter` == 0 ~ "False",
    `Final Parameter` != 0 ~ "True")
    )

In [None]:
sccoda %>%
  ggplot(aes(x = comparison, y = `Cell Type`, size = significant, color = as.numeric(`log2-fold change`))) +
  geom_point() +
  theme_classic() +
  RotatedAxis() +
  scale_color_gradient2(
    low = "#3B99B1FF",
    mid = "#EACB2BFF",
    high = "#F5191CFF"
    ) +
  scale_size_manual(values = c(1, 8)) +
  labs(title = "scCODA results", x = "Comparison", color = "Log2 FC", size = "Significant") +
  theme(text = element_text(size = 18))

***

# Myeloid vs Lymphoid only

In [None]:
data$disease_2 <- data$disease
data$disease_2[data$patient %in% c("P03", "P08")] <- "Brain met NSCLC"
data$disease_2[data$patient %in% c("P09")] <- "Brain met CM"
data$disease_2[data$patient %in% c("P12", "P16")] <- "Brain met BC"
pal_disease <- c("#e6194b", "#630119", "#ffcfda", "#3cb44b", "#ffe119", "#4363d8")

In [None]:
cd45_tab <- data@meta.data[, c("patient_sample", "i_annot", "disease_2")] %>%
  as.data.frame()

freq_table <- table(cd45_tab[, c("patient_sample", "i_annot")]) %>%
  as.data.frame()
freq_table <- merge(freq_table, unique(data@meta.data[, c("patient_sample", "disease_2")]))

totals <- freq_table %>%
  group_by(patient_sample) %>%
  summarise(total = sum(Freq))
freq_table <- merge(freq_table, totals, all.x = TRUE) %>%
  mutate(prop = round((Freq / total)*100, digits = 2))

pat <- freq_table %>%
  filter(i_annot == "T cells") %>%
  arrange(desc(prop)) %>%
  pull(patient_sample)
freq_table$patient_sample <- factor(freq_table$patient_sample, levels = pat)

dis <- freq_table %>%
  filter(i_annot == "T cells") %>%
  arrange(desc(prop)) %>%
  pull(disease_2) %>%
  as.character()

In [None]:
# bar at the top
p1 <- ggplot(freq_table, aes(y=-1, fill=disease_2, x=patient_sample)) +
  geom_tile() +
  scale_fill_manual(values = pal_disease) +
  theme_void() +
  labs(fill = "disease_2")

# actual barplot
p2 <- ggplot(freq_table, aes(fill=i_annot, y=prop, x=patient_sample)) +
  geom_bar(position="stack", stat="identity") +
  theme_classic() +
  geom_hline(yintercept = 50, linetype = "dashed") +
  scale_fill_manual(values = pal_i_annot) +
  RotatedAxis() +
  labs(x = "Patient", y = "Percetage", fill = "")

# patchwork
(p1 + p2)  +
  plot_layout(heights = c(1, 10), guides="collect") &
  theme(legend.justification = "left", text = element_text(size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank())

In [None]:
pat <- freq_table %>%
  filter(i_annot == "Non-immune") %>%
  arrange(desc(prop)) %>%
  pull(patient_sample)
freq_table$patient_sample <- factor(freq_table$patient_sample, levels = pat)

dis <- freq_table %>%
  filter(i_annot == "Non-immune") %>%
  arrange(desc(prop)) %>%
  pull(disease_2) %>%
  as.character()

In [None]:
# bar at the top
p1 <- ggplot(freq_table, aes(y=-1, fill=disease_2, x=patient_sample)) +
  geom_tile() +
  scale_fill_manual(values = pal_disease) +
  theme_void() +
  labs(fill = "disease_2")

# actual barplot
p2 <- ggplot(freq_table, aes(fill=i_annot, y=prop, x=patient_sample)) +
  geom_bar(position="stack", stat="identity") +
  theme_classic() +
  geom_hline(yintercept = 50, linetype = "dashed") +
  scale_fill_manual(values = pal_i_annot) +
  RotatedAxis() +
  labs(x = "Patient", y = "Percetage", fill = "")

# patchwork
(p1 + p2)  +
  plot_layout(heights = c(1, 10), guides="collect") &
  theme(legend.justification = "left", text = element_text(size = 20), axis.text.x = element_blank(), axis.ticks.x = element_blank())