In [None]:
### This script performs the wilcoxon rank sum test and hierarchical clustering on the Tangram deconvolution data to identify the significant abundant cell types between the clusters.
#### Load necessary packages
library(Seurat)
library(compositions)
library(tidyverse)
library(clustree)
library(patchwork)
library(uwot)
library(scran)
library(cluster)
library(ggrastr)
library(cowplot)
library(ggplot2)
library(RColorBrewer)
library(conflicted) # to be loaded in case of a conflict arises.

In [None]:
setwd("/media/bio/Disk/Research Data/EBV/omicverse")

In [None]:
tangram_ct_pred <- read.csv("Processed Data/tangram_ct_pred.csv", row.names = 1, check.names = FALSE)
colnames(tangram_ct_pred)

In [None]:
# Get the proportions data
deconv.mat <- as.data.frame(as.matrix(tangram_ct_pred))
head(deconv.mat)

In [None]:
# assign label to each spot based on the maximum predicted probabilities
celltype_cols = c('Epithelial', 'Tumor', 'Fibroblast', 'T', 'NK', 'B', 'Plasma', 'C1QC+ Macro', 'SPP1+ Macro', 'IL1B+ Macro', 'IgM+ plasma-like', 'Monocyte', 'cDC', 'pDC', 'Neutrophil', 'Mast')
tangram_ct_pred$cell_type <- apply(tangram_ct_pred[, celltype_cols], 1, function(x) {
  celltype_cols[which.max(x)]
})
head(tangram_ct_pred)

In [None]:
scNiche_metadata <- read.csv("Processed Data/scNiche_metadata.csv", row.names = 1, check.names = FALSE)
scNiche_metadata <- scNiche_metadata %>%
  select(sample_id, scNiche) %>%
  rownames_to_column("row_id")

head(scNiche_metadata)

In [None]:
# Prepare data for summary statistics
cluster_summary_pat <- deconv.mat %>%
  as.data.frame() %>%
  rownames_to_column("row_id") %>%
  left_join(scNiche_metadata, by = "row_id") %>%  # Join with meta_data using row_id as the key
  pivot_longer(-c(row_id, sample_id, scNiche), values_to = "ct_prop", names_to = "cell_type") %>%
  group_by(sample_id, scNiche, cell_type) %>%
  summarize(median_ct_prop = median(ct_prop, na.rm = TRUE))

  head(cluster_summary_pat)

In [None]:
# Aggregate data for median ct prop
cluster_summary <- cluster_summary_pat %>%
  ungroup() %>%
  group_by(scNiche, cell_type) %>%
  summarize(patient_median_ct_prop = median(median_ct_prop, na.rm = TRUE))
 
# Prepare matrix for hierarchical clustering
cluster_summary_mat <- cluster_summary %>%
  pivot_wider(values_from = patient_median_ct_prop, names_from = cell_type, values_fill = list(patient_median_ct_prop = 0)) %>%
  column_to_rownames("scNiche") %>%
  as.matrix()

In [None]:
# ---- Summary Code: Cluster Enrichment Analysis Based on Wilcoxon Test (Cross-Sample) + Significance Marking + Combined Barplots ----

# Load necessary packages
library(dplyr)
library(tidyr)
library(ggplot2)
library(forcats)
library(patchwork)
library(cowplot)
library(RColorBrewer)
conflicts_prefer(dplyr::rename)
conflicts_prefer(stats::dist)

# Assume cluster_summary_pat contains sample_id, scNiche, cell_type, median_ct_prop

# 1. Wilcoxon test (cluster vs. rest)
run_wilcox_up <- function(prop_data) {
  prop_data_group <- prop_data[["scNiche"]] %>% unique() %>% set_names()
  map(prop_data_group, function(g) {
    test_data <- prop_data %>%
      mutate(test_group = ifelse(scNiche == g, "target", "rest")) %>%
      mutate(test_group = factor(test_group, levels = c("target", "rest")))
    wilcox.test(median_ct_prop ~ test_group, data = test_data, alternative = "greater") %>%
      broom::tidy()
  }) %>% enframe("scNiche") %>% unnest(cols = value)
}

wilcoxon_res <- cluster_summary_pat %>%
  group_by(cell_type) %>%
  nest() %>%
  mutate(wres = map(data, run_wilcox_up)) %>%
  select(cell_type, wres) %>%
  unnest(cols = wres) %>%
  rename(p.value = p.value) %>%
  mutate(p_corr = p.adjust(p.value, method = "fdr"),
         significant = ifelse(p.value <= 0.05, "*", ""))
write.csv(wilcoxon_res, "Processed Data/CellType_Enrichment_scNiche_Wilcoxon_Res.csv")

# 2. Aggregate median values of cluster × cell_type
cluster_summary <- cluster_summary_pat %>%
  group_by(scNiche, cell_type) %>%
  summarize(patient_median_ct_prop = median(median_ct_prop, na.rm = TRUE), .groups = "drop")

# 3. Cluster ordering
cluster_summary_mat <- cluster_summary %>%
  pivot_wider(names_from = cell_type, values_from = patient_median_ct_prop, values_fill = 0) %>%
  column_to_rownames("scNiche") %>%
  as.matrix()

cluster_order <- hclust(dist(cluster_summary_mat))$labels[hclust(dist(cluster_summary_mat))$order]
ct_order <- hclust(dist(t(cluster_summary_mat)))$labels[hclust(dist(t(cluster_summary_mat)))$order]

# 4. Prepare data for plotting (with significance)
ggplot_data <- cluster_summary %>%
  left_join(wilcoxon_res, by = c("scNiche", "cell_type")) %>%
  mutate(cell_type = factor(cell_type, levels = ct_order),
         scNiche = factor(scNiche, levels = cluster_order)) %>%
  group_by(cell_type) %>%
  mutate(scaled_pat_median = scale(patient_median_ct_prop)[, 1]) %>%
  ungroup()

# 5. Create color mapping for cluster rows
cluster_color_map <- c(
    'Niche1' = '#a6cee3',
    'Niche2' = '#1f78b4',
    'Niche3' = '#33a02c',
    'Niche4' = '#fb9a99',
    'Niche5' = '#fdbf6f',
    'Niche6' = '#ff7f00',
    'Niche7' = '#6a3d9a',
    'Niche8' = '#ffff99',
    'Niche9' = '#b15928')

# 6. Row name color blocks + heatmap body (row name color blocks on the left)
annotation_df <- data.frame(scNiche = factor(cluster_order, levels = cluster_order), fill = cluster_order)
color_bar <- ggplot(annotation_df, aes(x = 1, y = scNiche, fill = fill)) +
  geom_tile() +
  geom_text(aes(label = scNiche), color = "black", size = 4) +
  scale_fill_manual(values = cluster_color_map) +
  theme_void() +
  theme(legend.position = "none")

mean_ct_prop_plt <- ggplot(ggplot_data, aes(x = cell_type, y = scNiche, fill = scaled_pat_median)) +
  geom_tile(color = "black") +
  geom_text(aes(label = significant), size = 4, na.rm = TRUE) +
  scale_fill_gradient2() +
  scale_color_manual(values = cluster_color_map) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 12),
        axis.text.y = element_blank(),
        axis.title = element_blank(),
        legend.position = "top",
        plot.margin = margin(0, 0, 0, 0))

# 7. Right-side barplot: cluster counts (synchronized colors, ensure consistent direction)
cluster_counts <- scNiche_metadata %>%
  dplyr::select(row_id, scNiche) %>%
  group_by(scNiche) %>%
  summarize(nspots = dplyr::n(), .groups = "drop") %>%
  mutate(prop_spots = nspots / sum(nspots),
         scNiche = factor(scNiche, levels = cluster_order))

bar_cluster <- ggplot(cluster_counts, aes(x = prop_spots, y = scNiche, fill = scNiche)) +
  geom_bar(stat = "identity", orientation = "y") +
  scale_fill_manual(values = cluster_color_map) +
  scale_x_continuous(breaks = c(0.1, 0.2, 0.3), limits = c(0, 0.35)) +
  theme_cowplot() + xlab("Fraction of Spots") + ylab("") + 
  theme(axis.text.y = element_blank(),
        axis.text.x = element_text(size = 10),
        legend.position = "none",
        plot.margin = margin(0, 0, 0, 0))

# 8. Combine plots: left color blocks + row names + heatmap + right barplot
final_plot <- plot_grid(color_bar, mean_ct_prop_plt, bar_cluster, 
                        ncol = 3, rel_widths = c(0.08, 0.5, 0.42), align = "h")

# Save and show the plot
ggsave(filename = "Results/10.NPC_ST_Analysis/CellType_Enrichment_Spatial_scNiche.pdf", plot = final_plot, device = cairo_pdf, width = 12, height = 5)
print(final_plot)

In [None]:
cluster_summary_pat$cell_type <- factor(cluster_summary_pat$cell_type, 
levels = c('Epithelial', 'Tumor', 'Fibroblast', 'T', 'NK', 'B', 'Plasma', 'C1QC+ Macro', 'SPP1+ Macro', 'IL1B+ Macro', 'IgM+ plasma-like', 'Monocyte', 'cDC', 'pDC', 'Neutrophil', 'Mast'))

In [None]:
# Set a color palette using RColorBrewer
colors <- brewer.pal(n = 9, name = "Paired")

# Create the plot
bboxplot_median_ct_prop <- cluster_summary_pat %>%
  ggplot(aes(x = scNiche, y = median_ct_prop, fill = scNiche)) +
  geom_boxplot(outlier.size = 1.5, outlier.colour = "red", width = 0.6, alpha = 0.7) +
  scale_fill_manual(values = cluster_color_map) +  # Applying the color palette
  theme_classic() +
  theme(axis.text.x = element_blank(),   # Remove x-axis labels
        axis.ticks.x = element_blank(),  # Remove x-axis ticks
        axis.title.x = element_blank(),  # Remove x-axis title
        axis.title.y = element_text(size = 12), # Adjust y-axis title size
        strip.text = element_text(size = 8),
        plot.title = element_text(size = 11, face = "bold"),
        legend.text = element_text(size = 11),  # Increase legend text size
        legend.title = element_text(size = 12)) +  # Increase legend title size
  labs(title = "",
       x = "",
       y = "Median Cell Type Proportion") +
  facet_wrap(. ~ cell_type, ncol = 8, scales = "free_y")

bboxplot_median_ct_prop
# Save the plot as a PDF
pdf("Results/10.NPC_ST_Analysis/bboxplot_median_ct_prop.pdf", width = 12, height = 3.5)
print(bboxplot_median_ct_prop)
dev.off()

In [None]:
sessionInfo()