# Longitudinal Clonal Dynamics Assessment

### Set Up Env

In [None]:
# Load project configuration
setwd("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR")
options(repr.matrix.max.rows=100, repr.matrix.max.cols=100)
options(warn = -1)
source("code/helper/Config.R", echo = FALSE)

In [None]:
# Load extra libraries
library(vegan)
library(ggExtra)

In [None]:
# Import plotting helper functions
source("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR/code/helper/Plotting_Functions.R", echo = FALSE)

In [None]:
# Define figures path
fig_dir <- "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR/out/figs/TCR_Fig_Jan"

### Load Tumor 10x Processed TCR Data with matched GEX Profiles

In [None]:
# Read data (wide format)
data <- qread(file = file.path(root_dir, "out", "data", "SERP_TCR-GEX_wide_11-2025_v2.qs"))

In [None]:
# Read data (long format)
data_long <- qread(file = file.path(root_dir, "out", "data", "SERP_TCR-GEX_11-2025_v2.qs"))

### Prepare data

In [None]:
# Substitute NAs from normalized counts to 0s
data <- data %>%
    mutate(across(c(norm_cloneSize_T0, norm_cloneSize_T1, norm_cloneSize_EOT),
                ~ replace_na(., 0)))

In [None]:
# Create consensus lv1 annotation between SCR and C02 for pre-existing clonotypes
data <- data %>%
    mutate(lv1 = ifelse((presence_status == "Pre-existing") & (lv1_T0 == lv1_T1), lv1_T1, ifelse(
        presence_status == "De Novo", lv1_T1, ifelse(
            presence_status == "Lost", lv1_T0, "not_matched")
    )))

In [None]:
# Create consensus CD4/CD8 annotation between SCR and C02 for pre-existing clonotypes
data <- data %>%
    mutate(CD4_CD8_assignment = ifelse((presence_status == "Pre-existing") & (CD4_CD8_assignment_T0 == CD4_CD8_assignment_T1), CD4_CD8_assignment_T1, ifelse(
        presence_status == "De Novo", CD4_CD8_assignment_T1, ifelse(
            presence_status == "Lost", CD4_CD8_assignment_T0, "not_matched")
    )))

In [None]:
# Remove outlier
data <- data %>%
    filter(clonotype_id != "TRBV2_TRBJ1-1_GCCAGGGGGGATGATCCGAAGGGGCGGGGGACTGAAGCTTTC") %>%
    # renormalize after removing the outlier
    group_by(patient) %>%
    mutate(
        norm_cloneSize_T0 = cloneSize_T0 / sum(cloneSize_T0, na.rm = TRUE),
        norm_cloneSize_T1 = cloneSize_T1 / sum(cloneSize_T1, na.rm = TRUE),
        norm_cloneSize_EOT = cloneSize_EOT / sum(cloneSize_EOT, na.rm = TRUE)
    )
dim(data)

In [None]:
# Filter data for CD4 and CD8 populations, SCR and C02 time points, liver and lung mets, and patients with matched SCR and C02 data
patients_keep <- c("P01", "P02", "P03", "P10", "P14", "P17", "P20", "P26", "P29", "P31", "P33", "P34", "P35") 
data_filt <- data %>%
    filter(
        patient %in% patients_keep,
        met_loc %in% c("Lung", "Liver"),
        CD4_CD8_assignment %in% c("CD4", "CD8"), 
    )
dim(data_filt)

In [None]:
# Create mean clonal size column
data_filt <- data_filt %>%
    mutate(mean_norm_cloneSize = rowMeans(across(c(norm_cloneSize_T0, norm_cloneSize_T1)), na.rm = TRUE))

In [None]:
# Create CD4/CD8 assignment + Tissue column
data_filt <- data_filt %>%
    mutate(T_lineage_tissue = paste(met_loc, CD4_CD8_assignment, sep = " "))

## Clonal Scatter across time points and metastatic sites with clonotype densities

In [None]:
options(repr.plot.width = 7, repr.plot.height = 7)

p <- ggplot(data_filt, aes(x = log10(norm_cloneSize_T1+1.1e-4), y = log10(norm_cloneSize_T0+1.1e-4), size = log(mean_norm_cloneSize+1e-4))) +
            ggrastr::rasterize(geom_point(size = 1.5, alpha = 0.5)) +
            geom_density_2d_filled(data = data_filt %>% filter(presence_status == "Pre-existing"), 
                                   aes(fill = ..level..), geom = "polygon", alpha = 0.8, contour_var = "ndensity", expand = TRUE) +
            scale_fill_viridis_d(option = "magma") + 
            geom_density_2d(linewidth = 0.25, colour = "black") +
            labs(
                x = expression(Log10("Proportions C02")),
                y = expression(Log10("Proportions SCR"))
            ) +
            theme_bw(base_size = 15) +
            guides(size = "none", fill = "none") + 
            theme(
                legend.position = "bottom",
                strip.text = element_text(face = "bold"),
                panel.grid = element_blank(),
                panel.border = element_rect(color = "black", linewidth = 1.5),
                axis.title = element_text(, hjust = 0.5),
                plot.title = element_text(hjust = 0.5, face = "bold"),
                axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0)),
                axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))
            ) +
            #xlim(log10(1e-8),0) + ylim(log10(1e-8),0) +
            coord_cartesian(
                xlim = c(log10(1e-4), -1),
                ylim = c(log10(1e-4), -1.2)
            ) + 
            scale_x_continuous(expand = expansion(mult = 0)) +
            scale_y_continuous(expand = expansion(mult = 0)) +
            geom_abline(slope = 1, intercept = 0, color = "black") +
            geom_hline(yintercept = log10(0.00014), color = "lightgrey", linewidth = 0.5) + 
            geom_vline(xintercept = log10(0.00014), color = "lightgrey", linewidth = 0.5) + 
            facet_wrap(vars(T_lineage_tissue))
print(p)
ggsave(filename = file.path(fig_dir, "ClonalScatter_2D-Densities_Wrap.pdf"), plot = last_plot(), dpi = 300, width = 7, height = 7)

## Baseline diversity comparison between CD4s and CD8s stratified by metastatic site

In [None]:
# Compute metrics at baseline
baseline_metrics <- data %>%
    filter(
        patient %in% patients_keep,
        met_loc %in% c("Lung", "Liver")
        ) %>% 
    group_by(patient, met_loc, CD4_CD8_assignment) %>%
    summarise(
        Shannon_T0 = diversity(norm_cloneSize_T0, index = "shannon", , base=exp(1))
        ) %>%
    filter(CD4_CD8_assignment %in% c("CD4", "CD8"))
head(baseline_metrics)

# Convert metrics to long format for plotting
baseline_plot <- baseline_metrics %>%
    pivot_longer(
        cols = c(Shannon_T0),
        names_to = c(".value", "timepoint"),
        names_pattern = "(.*)_(T\\d)"
    )
head(baseline_plot)

# Compute statistical tests
wilcox_test_liver <- wilcox.test(
    x = baseline_plot$Shannon[baseline_plot$met_loc == "Liver" & baseline_plot$CD4_CD8_assignment == "CD4"],
    y = baseline_plot$Shannon[baseline_plot$met_loc == "Liver" & baseline_plot$CD4_CD8_assignment == "CD8"],
    paired = TRUE
)
p_val_liver <- signif(wilcox_test_liver$p.value, 3)
print(p_val_liver)

wilcox_test_lung <- wilcox.test(
    x = baseline_plot$Shannon[baseline_plot$met_loc == "Lung" & baseline_plot$CD4_CD8_assignment == "CD4"],
    y = baseline_plot$Shannon[baseline_plot$met_loc == "Lung" & baseline_plot$CD4_CD8_assignment == "CD8"],
    paired = TRUE
)
p_val_lung <- signif(wilcox_test_lung$p.value, 3)
print(p_val_lung)

wilcox_test_cd4 <- wilcox.test(
    x = baseline_plot$Shannon[baseline_plot$met_loc == "Lung" & baseline_plot$CD4_CD8_assignment == "CD4"],
    y = baseline_plot$Shannon[baseline_plot$met_loc == "Liver" & baseline_plot$CD4_CD8_assignment == "CD4"],
    paired = FALSE
)
p_val_cd4 <- signif(wilcox_test_cd4$p.value, 3)
print(p_val_cd4)

wilcox_test_cd8 <- wilcox.test(
    x = baseline_plot$Shannon[baseline_plot$met_loc == "Lung" & baseline_plot$CD4_CD8_assignment == "CD8"],
    y = baseline_plot$Shannon[baseline_plot$met_loc == "Liver" & baseline_plot$CD4_CD8_assignment == "CD8"],
    paired = FALSE
)
p_val_cd8 <- signif(wilcox_test_cd8$p.value, 3)
print(p_val_cd8)

# Plot
options(repr.plot.width = 5, repr.plot.height = 4)
ggplot(baseline_plot, aes(x = met_loc, y = Shannon, fill = CD4_CD8_assignment)) +
    geom_boxplot(width = 2, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
    geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(1)) + 
    scale_fill_manual(
         values = c("CD4" = "#298c8c", "CD8" = "#f1a226")          
    ) + 
    theme_linedraw(base_size = 15) + 
    theme(
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),
        plot.title = element_text(hjust=0.5)
    ) + 
    labs(title = "Baseline (SCR)", x = "Tissue", y = "Shannon Entropy", fill = "T Lineage") +  
    #geom_text(aes(x = 1, y = max(Shannon) + 0.2,  fontface = "plain",
    #              label = paste0("p = ", p_val_liver)), inherit.aes = FALSE, size = 5) + 
    #geom_text(aes(x = 2, y = max(Shannon) + 0.2, fontface = "plain",
    #              label = paste0("p = ", p_val_lung)), inherit.aes = FALSE, size = 5)
    ylim(4, 9)
ggsave(filename = file.path(fig_dir, "Boxplot_Baseline_Entropy_CD4vsCD8.pdf"), plot = last_plot(), dpi = 300, width = 5, height = 4)

In [None]:
baseline_metrics_wide <- baseline_metrics %>%
    pivot_wider(
        names_from = CD4_CD8_assignment, 
        values_from = Shannon_T0,
        names_prefix = "Shannon_T0_"
    ) %>%
    # compute delta between CD8 and CD4
    mutate(delta_Shannon_T0 = Shannon_T0_CD4 - Shannon_T0_CD8)
baseline_metrics_wide

In [None]:
library(boot)

# Function to compute median Δ and 95% CI per tissue, using precomputed p-value
compute_delta_stats <- function(df, tissue, p_val, timepoint, delta_col = "delta_Shannon_T0") {
  
    # Subset delta values
    delta_vals <- df %>%
        filter(timepoint == timepoint, met_loc == tissue) %>%
        pull({{delta_col}})
  
    # Median
    med <- median(delta_vals)
  
    # Bootstrap CI
    median_fun <- function(data, indices) median(data[indices])
    set.seed(123)
    boot_obj <- boot(delta_vals, statistic = median_fun, R = 10000)
    ci <- boot.ci(boot_obj, type = "perc")$percent[4:5]  # 95% CI lower/upper
  
    # Return as tibble
    tibble(
        Tissue = tissue,
        Timepoint = timepoint,
        Median_Delta = med,
        CI_lower = ci[1],
        CI_upper = ci[2],
        Wilcox_p = p_val
    )
}

# Compute stats for liver and lung using your precomputed p-values
stats_liver <- compute_delta_stats(baseline_metrics_wide, "Liver", p_val_liver, "delta_Shannon_T0")
stats_lung  <- compute_delta_stats(baseline_metrics_wide, "Lung",  p_val_lung, "delta_Shannon_T0")

# Combine into a single table
delta_stats_table <- bind_rows(stats_liver, stats_lung)

# Print table
print(delta_stats_table)

In [None]:
head(baseline_metrics)

In [None]:
# Compute effect sizes on unpaired comparisons
shannon_cd4_liver <- baseline_metrics %>% filter(CD4_CD8_assignment == "CD4", met_loc == "Liver") %>% pull(Shannon_T0)
shannon_cd4_lung <- baseline_metrics %>% filter(CD4_CD8_assignment == "CD4", met_loc == "Lung") %>% pull(Shannon_T0)
shannon_cd8_liver <- baseline_metrics %>% filter(CD4_CD8_assignment == "CD8", met_loc == "Liver") %>% pull(Shannon_T0)
shannon_cd8_lung <- baseline_metrics %>% filter(CD4_CD8_assignment == "CD8", met_loc == "Lung") %>% pull(Shannon_T0)

delta_cd4 <- median(shannon_cd4_lung) - median(shannon_cd4_liver)
cat("Delta CD4:", delta_cd4)

delta_cd8 <- median(shannon_cd8_lung) - median(shannon_cd8_liver)
cat("\nDelta CD8:", delta_cd8)


# Compute non-parametric CI
set.seed(123)
n_boot <- 10000

boot_delta_cd4 <- replicate(n_boot, {
    lung_star  <- sample(shannon_cd4_lung,  replace = TRUE)
    liver_star <- sample(shannon_cd4_liver, replace = TRUE)
    median(lung_star, na.rm = TRUE) - median(liver_star, na.rm = TRUE)
})

boot_delta_cd8 <- replicate(n_boot, {
    lung_star  <- sample(shannon_cd8_lung,  replace = TRUE)
    liver_star <- sample(shannon_cd8_liver, replace = TRUE)
    median(lung_star, na.rm = TRUE) - median(liver_star, na.rm = TRUE)
})

# 95% percentile CI
ci_cd4 <- quantile(boot_delta_cd4, probs = c(0.025, 0.975))
ci_cd8 <- quantile(boot_delta_cd8, probs = c(0.025, 0.975))

cat("\nCD4 95% CI:", ci_cd4, "\n")
cat("CD8 95% CI:", ci_cd8, "\n")

## Post-treatment diversity comparison between CD4s and CD8s stratified by metastatic site

In [None]:
# Post-treatment CD4 vs CD8 entropy comparison

# Compute metrics at C02
baseline_metrics <- data %>%
    filter(
        patient %in% patients_keep,
        met_loc %in% c("Lung", "Liver")
        ) %>% 
    group_by(patient, met_loc, CD4_CD8_assignment) %>%
    summarise(
        Shannon_T1 = diversity(norm_cloneSize_T1, index = "shannon", , base=exp(1))
        ) %>%
    filter(CD4_CD8_assignment %in% c("CD4", "CD8"))
head(baseline_metrics)

# Convert metrics to long format for plotting
baseline_plot <- baseline_metrics %>%
    pivot_longer(
        cols = c(Shannon_T1),
        names_to = c(".value", "timepoint"),
        names_pattern = "(.*)_(T\\d)"
    )
head(baseline_plot)

# Compute statistical tests
wilcox_test_liver <- wilcox.test(
    x = baseline_plot$Shannon[baseline_plot$met_loc == "Liver" & baseline_plot$CD4_CD8_assignment == "CD4"],
    y = baseline_plot$Shannon[baseline_plot$met_loc == "Liver" & baseline_plot$CD4_CD8_assignment == "CD8"],
    paired = TRUE
)
p_val_liver <- signif(wilcox_test_liver$p.value, 3)
print(p_val_liver)

wilcox_test_lung <- wilcox.test(
   x = baseline_plot$Shannon[baseline_plot$met_loc == "Lung" & baseline_plot$CD4_CD8_assignment == "CD4"],
    y = baseline_plot$Shannon[baseline_plot$met_loc == "Lung" & baseline_plot$CD4_CD8_assignment == "CD8"],
    paired = TRUE
)
p_val_lung <- signif(wilcox_test_lung$p.value, 3)
print(p_val_lung)

wilcox_test_cd4 <- wilcox.test(
    x = baseline_plot$Shannon[baseline_plot$met_loc == "Lung" & baseline_plot$CD4_CD8_assignment == "CD4"],
    y = baseline_plot$Shannon[baseline_plot$met_loc == "Liver" & baseline_plot$CD4_CD8_assignment == "CD4"],
    paired = FALSE
)
p_val_cd4 <- signif(wilcox_test_cd4$p.value, 3)
print(p_val_cd4)

wilcox_test_cd8 <- wilcox.test(
    x = baseline_plot$Shannon[baseline_plot$met_loc == "Lung" & baseline_plot$CD4_CD8_assignment == "CD8"],
    y = baseline_plot$Shannon[baseline_plot$met_loc == "Liver" & baseline_plot$CD4_CD8_assignment == "CD8"],
    paired = FALSE
)
p_val_cd8 <- signif(wilcox_test_cd8$p.value, 3)
print(p_val_cd8)

# Plot
options(repr.plot.width = 5, repr.plot.height = 4)
ggplot(baseline_plot, aes(x = met_loc, y = Shannon, fill = CD4_CD8_assignment)) +
    geom_boxplot(width = 2, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
    geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(1)) + 
    scale_fill_manual(
         values = c("CD4" = "#298c8c", "CD8" = "#f1a226")          
    ) + 
    theme_linedraw(base_size = 15) + 
    theme(
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),
        plot.title = element_text(hjust=0.5)
    ) + 
    labs(title = "Post-ICI (C02)", x = "Tissue", y = "Shannon Entropy", fill = "T Lineage") +  
    #geom_text(aes(x = 1, y = max(Shannon) + 0.2,  fontface = "plain",
    #              label = paste0("p = ", p_val_liver)), inherit.aes = FALSE, size = 5) + 
    #geom_text(aes(x = 2, y = max(Shannon) + 0.2, fontface = "plain",
    #              label = paste0("p = ", p_val_lung)), inherit.aes = FALSE, size = 5)
    ylim(4, 9)
ggsave(filename = file.path(fig_dir, "Boxplot_Post-ICI_Entropy_CD4vsCD8.pdf"), plot = last_plot(), dpi = 300, width = 5, height = 4)

In [None]:
baseline_metrics_wide <- baseline_metrics %>%
    pivot_wider(
        names_from = CD4_CD8_assignment, 
        values_from = Shannon_T1,
        names_prefix = "Shannon_T1_"
    ) %>%
    # compute delta between CD8 and CD4
    mutate(delta_Shannon_T1 = Shannon_T1_CD4 - Shannon_T1_CD8)
baseline_metrics_wide

In [None]:
library(boot)

# Function to compute median Δ and 95% CI per tissue, using precomputed p-value
compute_delta_stats <- function(df, tissue, p_val, timepoint, delta_col = "delta_Shannon_T1") {
  
  # Subset delta values
    delta_vals <- df %>%
        filter(timepoint == timepoint, met_loc == tissue) %>%
        pull({{delta_col}})
  
    # Median
    med <- median(delta_vals)
  
    # Bootstrap CI
    median_fun <- function(data, indices) median(data[indices])
    set.seed(123)
    boot_obj <- boot(delta_vals, statistic = median_fun, R = 10000)
    ci <- boot.ci(boot_obj, type = "perc")$percent[4:5]  # 95% CI lower/upper
  
    # Return as tibble
    tibble(
        Tissue = tissue,
        Timepoint = timepoint,
        Median_Delta = med,
        CI_lower = ci[1],
        CI_upper = ci[2],
        Wilcox_p = p_val
    )
}

# Compute stats for liver and lung using your precomputed p-values
stats_liver <- compute_delta_stats(baseline_metrics_wide, "Liver", p_val_liver, "delta_Shannon_T1")
stats_lung  <- compute_delta_stats(baseline_metrics_wide, "Lung",  p_val_lung, "delta_Shannon_T1")

# Combine into a single table
delta_stats_table <- bind_rows(stats_liver, stats_lung)

# Print table
print(delta_stats_table)

## Longitudinal Shannon entropy assessment of T effector poulations stratified by metastatic site

In [None]:
# Group data by patient and compute Shannon entropy
library(vegan)
metrics_wide <- data_filt %>%
    group_by(patient, met_loc, lv1) %>%
    summarise(
        # richness
        richness_T0 = specnumber(norm_cloneSize_T0),
        richness_T1 = specnumber(norm_cloneSize_T1),

        # shannon entropy
        Shannon_T0 = diversity(norm_cloneSize_T0, index = "shannon", base=exp(1)),
        Shannon_T1 = diversity(norm_cloneSize_T1, index = "shannon", base=exp(1)),

        # clonality
        clonality_T0 = 1 - (Shannon_T0 / log(richness_T0)),
        clonality_T1 = 1 - (Shannon_T1 / log(richness_T1)),

        # Simpson index
        simpson_T0 = diversity(norm_cloneSize_T0, index = "simpson"),
        simpson_T1 = diversity(norm_cloneSize_T1, index = "simpson"),

        # Compute Shannon entropy delta
        delta_Shannon = Shannon_T1 - Shannon_T0
    )
head(metrics_wide)

In [None]:
# Convert metrics to long format for plotting
metrics_plot <- metrics_wide %>%
    select(-richness_T0, -richness_T1) %>%
    pivot_longer(
        cols = c(Shannon_T0, Shannon_T1, clonality_T0, clonality_T1, simpson_T0, simpson_T1),
        names_to = c(".value", "timepoint"),
        names_pattern = "(.*)_(T\\d)"
    ) 
head(metrics_plot)

### CD4s

In [None]:
# Compute statistical test on shannon entropy for liver and lung mets

# Filter liver values
metrics_wide_liver <- metrics_wide %>% filter(met_loc == "Liver", lv1 == "CD4")

# (Liver) Paired Wilcoxon test
wilcox_test_liver <- wilcox.test(metrics_wide_liver$Shannon_T0, metrics_wide_liver$Shannon_T1, paired = TRUE)
p_val_liver <- signif(wilcox_test_liver$p.value, 3)
print(p_val_liver)

# Filter lung values
metrics_wide_lung <- metrics_wide %>% filter(met_loc == "Lung", lv1 == "CD4")

# (Lung) Paired Wilcoxon test
wilcox_test_lung <- wilcox.test(metrics_wide_lung$Shannon_T0, metrics_wide_lung$Shannon_T1, paired = TRUE)
p_val_lung <- signif(wilcox_test_lung$p.value, 3)
print(p_val_lung)

# (T0) Unpaired Wilcoxon test
wilcox_test_t0 <- wilcox.test(
    metrics_wide %>% filter(lv1 == "CD4", met_loc == "Liver") %>% pull(Shannon_T0), 
    metrics_wide %>% filter(lv1 == "CD4", met_loc == "Lung") %>% pull(Shannon_T0),
    paired = FALSE
)
p_val_t0 <- signif(wilcox_test_t0$p.value, 3)
print(p_val_t0)

# (T1) Unpaired Wilcoxon test
wilcox_test_t1 <- wilcox.test(
    metrics_wide %>% filter(lv1 == "CD4", met_loc == "Liver") %>% pull(Shannon_T1), 
    metrics_wide %>% filter(lv1 == "CD4", met_loc == "Lung") %>% pull(Shannon_T0),
    paired = FALSE
)
p_val_t1 <- signif(wilcox_test_t1$p.value, 3)
print(p_val_t1)

In [None]:
# Plot Shannon diversity comparison for effector CD4s
options(repr.plot.width = 5, repr.plot.height = 4)
ggplot(metrics_plot %>% filter(lv1 == "CD4"), aes(x = met_loc, y = Shannon, fill = timepoint)) +
    geom_boxplot(width = 2, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
    geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(1)) + 
    scale_fill_manual(
        values = c("T0" = "#D4D4D4", "T1" = "#3A3B3C"),   
        labels = c("T0" = "SCR", "T1" = "C02")          
    ) +
    theme_linedraw(base_size = 15) + 
    theme(
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),
        plot.title = element_text(hjust=0.5)
    ) + 
    labs(title = "Eff. CD4 T Cells", x = "Tissue", y = "Shannon Entropy", fill = "Time Point") + 
    scale_x_discrete(labels = c("Liver" = "Liver (n=6)", "Lung" = "Lung (n=7)")) + 
    #geom_text(aes(x = 1, y = max(Shannon) + 0.3, fontface = "plain",
    #              label = p_val_liver), inherit.aes = FALSE, size = 4, fontface = "plain") + 
    #geom_text(aes(x = 2, y = max(Shannon) + 0.3, fontface = "plain",
    #              label = p_val_lung), inherit.aes = FALSE, size = 4, fontface = "plain") 
    ylim(3.2, 7.5)
ggsave(filename = file.path(fig_dir, "Boxplot_Eff-CD4_Entropy_Tissue-Timepoint.pdf"), plot = last_plot(), dpi = 300, width = 5, height = 4)

In [None]:
# Function to compute median Δ and 95% CI per tissue, using precomputed p-value
compute_delta_stats <- function(df, tissue, p_val, lineage = "CD4", delta_col = "delta_Shannon") {
  
  # Subset delta values
  delta_vals <- df %>%
    filter(lv1 == lineage, met_loc == tissue) %>%
    pull({{delta_col}})
  
  # Median
  med <- median(delta_vals)
  
  # Bootstrap CI
  median_fun <- function(data, indices) median(data[indices])
  set.seed(123)
  boot_obj <- boot(delta_vals, statistic = median_fun, R = 10000)
  ci <- boot.ci(boot_obj, type = "perc")$percent[4:5]  # 95% CI lower/upper
  
  # Return as tibble
  tibble(
    Tissue = tissue,
    Lineage = lineage,
    Median_Delta = med,
    CI_lower = ci[1],
    CI_upper = ci[2],
    Wilcox_p = p_val
  )
}

# Compute stats for liver and lung using your precomputed p-values
stats_liver <- compute_delta_stats(metrics_wide, "Liver", p_val_liver, "CD4")
stats_lung  <- compute_delta_stats(metrics_wide, "Lung",  p_val_lung,  "CD4")

# Combine into a single table
delta_stats_table <- bind_rows(stats_liver, stats_lung)

# Print table
print(delta_stats_table)

### Tregs

In [None]:
# Compute statistical test on shannon entropy for liver and lung mets

# Filter liver values
metrics_wide_liver <- metrics_wide %>% filter(met_loc == "Liver", lv1 == "Treg")

# (Liver) Paired Wilcoxon test
wilcox_test_liver <- wilcox.test(metrics_wide_liver$Shannon_T0, metrics_wide_liver$Shannon_T1, paired = TRUE)
p_val_liver <- signif(wilcox_test_liver$p.value, 3)
print(p_val_liver)

# Filter lung values
metrics_wide_lung <- metrics_wide %>% filter(met_loc == "Lung", lv1 == "Treg")

# (Lung) Paired Wilcoxon test
wilcox_test_lung <- wilcox.test(metrics_wide_lung$Shannon_T0, metrics_wide_lung$Shannon_T1, paired = TRUE)
p_val_lung <- signif(wilcox_test_lung$p.value, 3)
print(p_val_lung)

# (T0) Unpaired Wilcoxon test
wilcox_test_t0 <- wilcox.test(
    metrics_wide %>% filter(lv1 == "Treg", met_loc == "Liver") %>% pull(Shannon_T0), 
    metrics_wide %>% filter(lv1 == "Treg", met_loc == "Lung") %>% pull(Shannon_T0),
    paired = FALSE
)
p_val_t0 <- signif(wilcox_test_t0$p.value, 3)
print(p_val_t0)

# (T1) Unpaired Wilcoxon test
wilcox_test_t1 <- wilcox.test(
    metrics_wide %>% filter(lv1 == "Treg", met_loc == "Liver") %>% pull(Shannon_T1), 
    metrics_wide %>% filter(lv1 == "Treg", met_loc == "Lung") %>% pull(Shannon_T0),
    paired = FALSE
)
p_val_t1 <- signif(wilcox_test_t1$p.value, 3)
print(p_val_t1)

In [None]:
# Plot Shannon diversity comparison for CD4s
options(repr.plot.width = 5, repr.plot.height = 4)
ggplot(metrics_plot %>% filter(lv1 == "Treg"), aes(x = met_loc, y = Shannon, fill = timepoint)) +
    geom_boxplot(width = 2, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
    geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(1)) + 
    scale_fill_manual(
        values = c("T0" = "#D4D4D4", "T1" = "#3A3B3C"),   
        labels = c("T0" = "SCR", "T1" = "C02")          
    ) +
    theme_linedraw(base_size = 15) + 
    theme(
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),
        plot.title = element_text(hjust=0.5)
    ) + 
    labs(title = "Tregs", x = "Tissue", y = "Shannon Entropy", fill = "Time Point") + 
    scale_x_discrete(labels = c("Liver" = "Liver (n=6)", "Lung" = "Lung (n=7)")) + 
    #geom_text(aes(x = 1, y = max(Shannon) + 0.3, fontface = "plain",
    #              label = paste0("p = ", p_val_liver)), inherit.aes = FALSE, size = 5) + 
    #geom_text(aes(x = 2, y = max(Shannon) + 0.3, fontface = "plain",
    #              label = paste0("p = ", p_val_lung)), inherit.aes = FALSE, size = 5) 
    ylim(3.2, 7.5)
ggsave(filename = file.path(fig_dir, "Boxplot_Tregs_Entropy_Tissue-Timepoint.pdf"), plot = last_plot(), dpi = 300, width = 5, height = 4)

In [None]:
# Function to compute median Δ and 95% CI per tissue, using precomputed p-value
compute_delta_stats <- function(df, tissue, p_val, lineage, delta_col = "delta_Shannon") {
  
  # Subset delta values
  delta_vals <- df %>%
    filter(lv1 == lineage, met_loc == tissue) %>%
    pull({{delta_col}})
  
  # Median
  med <- median(delta_vals)
  
  # Bootstrap CI
  median_fun <- function(data, indices) median(data[indices])
  set.seed(123)
  boot_obj <- boot(delta_vals, statistic = median_fun, R = 10000)
  ci <- boot.ci(boot_obj, type = "perc")$percent[4:5]  # 95% CI lower/upper
  
  # Return as tibble
  tibble(
    Tissue = tissue,
    Lineage = lineage,
    Median_Delta = med,
    CI_lower = ci[1],
    CI_upper = ci[2],
    Wilcox_p = p_val
  )
}

# Compute stats for liver and lung using your precomputed p-values
stats_liver <- compute_delta_stats(metrics_wide, "Liver", p_val_liver, "Treg")
stats_lung  <- compute_delta_stats(metrics_wide, "Lung",  p_val_lung, "Treg")

# Combine into a single table
delta_stats_table <- bind_rows(stats_liver, stats_lung)

# Print table
print(delta_stats_table)

### CD8s

In [None]:
# Compute statistical test on shannon entropy for liver and lung mets

# Filter liver values
metrics_wide_liver <- metrics_wide %>% filter(met_loc == "Liver", lv1 == "CD8")

# (Liver) Paired Wilcoxon test
wilcox_test_liver <- wilcox.test(metrics_wide_liver$Shannon_T0, metrics_wide_liver$Shannon_T1, paired = TRUE)
p_val_liver <- signif(wilcox_test_liver$p.value, 3)
print(p_val_liver)

# Filter lung values
metrics_wide_lung <- metrics_wide %>% filter(met_loc == "Lung", lv1 == "CD8")

# (Lung) Paired Wilcoxon test
wilcox_test_lung <- wilcox.test(metrics_wide_lung$Shannon_T0, metrics_wide_lung$Shannon_T1, paired = TRUE)
p_val_lung <- signif(wilcox_test_lung$p.value, 3)
print(p_val_lung)

# (T0) Unpaired Wilcoxon test
wilcox_test_t0 <- wilcox.test(
    metrics_wide %>% filter(lv1 == "CD8", met_loc == "Liver") %>% pull(Shannon_T0), 
    metrics_wide %>% filter(lv1 == "CD8", met_loc == "Lung") %>% pull(Shannon_T0),
    paired = FALSE
)
p_val_t0 <- signif(wilcox_test_t0$p.value, 3)
print(p_val_t0)

# (T1) Unpaired Wilcoxon test
wilcox_test_t1 <- wilcox.test(
    metrics_wide %>% filter(lv1 == "CD8", met_loc == "Liver") %>% pull(Shannon_T1), 
    metrics_wide %>% filter(lv1 == "CD8", met_loc == "Lung") %>% pull(Shannon_T0),
    paired = FALSE
)
p_val_t1 <- signif(wilcox_test_t1$p.value, 3)
print(p_val_t1)

In [None]:
# Plot Shannon diversity comparison for CD8s
options(repr.plot.width = 5, repr.plot.height = 4)
ggplot(metrics_plot %>% filter(lv1 == "CD8"), aes(x = met_loc, y = Shannon, fill = timepoint)) +
    geom_boxplot(width = 2, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
    geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(1)) + 
    scale_fill_manual(
        values = c("T0" = "#D4D4D4", "T1" = "#3A3B3C"),   
        labels = c("T0" = "SCR", "T1" = "C02")          
    ) +
    theme_linedraw(base_size = 15) + 
    theme(
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),
        plot.title = element_text(hjust=0.5)
    ) + 
    labs(title = "Eff. CD8 T Cells", x = "Tissue", y = "Shannon Entropy", fill = "Time Point") + 
    scale_x_discrete(labels = c("Liver" = "Liver (n=6)", "Lung" = "Lung (n=7)")) + 
    #geom_text(aes(x = 1, y = max(Shannon) + 0.2,  fontface = "plain",
    #              label = paste0("p = ", p_val_liver)), inherit.aes = FALSE, size = 5) + 
    #geom_text(aes(x = 2, y = max(Shannon) + 0.2, fontface = "plain",
    #              label = paste0("p = ", p_val_lung)), inherit.aes = FALSE, size = 5) 
    ylim(3.2, 7.5)
ggsave(filename = file.path(fig_dir, "Boxplot_Eff-CD8_Entropy_Tissue-Timepoint.pdf"), plot = last_plot(), dpi = 300, width = 5, height = 4)

In [None]:
# Function to compute median Δ and 95% CI per tissue, using precomputed p-value
compute_delta_stats <- function(df, tissue, p_val, lineage, delta_col = "delta_Shannon") {
  
  # Subset delta values
  delta_vals <- df %>%
    filter(lv1 == lineage, met_loc == tissue) %>%
    pull({{delta_col}})
  
  # Median
  med <- median(delta_vals)
  
  # Bootstrap CI
  median_fun <- function(data, indices) median(data[indices])
  set.seed(123)
  boot_obj <- boot(delta_vals, statistic = median_fun, R = 10000)
  ci <- boot.ci(boot_obj, type = "perc")$percent[4:5]  # 95% CI lower/upper
  
  # Return as tibble
  tibble(
    Tissue = tissue,
    Lineage = lineage,
    Median_Delta = med,
    CI_lower = ci[1],
    CI_upper = ci[2],
    Wilcox_p = p_val
  )
}

# Compute stats for liver and lung using your precomputed p-values
stats_liver <- compute_delta_stats(metrics_wide, "Liver", p_val_liver, "CD8")
stats_lung  <- compute_delta_stats(metrics_wide, "Lung",  p_val_lung, "CD8")

# Combine into a single table
delta_stats_table <- bind_rows(stats_liver, stats_lung)

# Print table
print(delta_stats_table)

In [None]:
# Compute statistical test on clonality for liver and lung mets

# (Liver) Pivot to wide format for paired test
# Filter liver values
metrics_wide_liver <- metrics_wide %>%
    filter(met_loc == "Liver",
           lv1 == "CD8"
          )

# (Liver) Paired Wilcoxon test
wilcox_test_liver <- wilcox.test(metrics_wide_liver$clonality_T0, metrics_wide_liver$clonality_T1, paired = TRUE)
p_val_liver <- signif(wilcox_test_liver$p.value, 3)
print(p_val_liver)

# Filter lung values
metrics_wide_lung <- metrics_wide %>%
    filter(met_loc == "Lung",
           lv1 == "CD8"
          )

# (Lung) Paired Wilcoxon test
wilcox_test_lung <- wilcox.test(metrics_wide_lung$clonality_T0, metrics_wide_lung$clonality_T1, paired = TRUE)
p_val_lung <- signif(wilcox_test_lung$p.value, 3)
print(p_val_lung)

## Longitudinal Shannon entropy assessment of CD4 and CD8 poulations stratified by metastatic site

In [None]:
data_filt <- data %>%
    filter(
        patient %in% patients_keep,
        met_loc %in% c("Lung", "Liver"),
        CD4_CD8_assignment %in% c("CD4", "CD8"), 
    )

In [None]:
# Group data by patient and compute Shannon entropy
library(vegan)
metrics_wide <- data_filt %>%
    group_by(patient, met_loc, CD4_CD8_assignment) %>%
    summarise(
        # richness
        richness_T0 = specnumber(norm_cloneSize_T0),
        richness_T1 = specnumber(norm_cloneSize_T1),

        # shannon entropy
        Shannon_T0 = diversity(norm_cloneSize_T0, index = "shannon", base=exp(1)),
        Shannon_T1 = diversity(norm_cloneSize_T1, index = "shannon", base=exp(1)),

        # clonality
        clonality_T0 = 1 - (Shannon_T0 / log(richness_T0)),
        clonality_T1 = 1 - (Shannon_T1 / log(richness_T1)),

        # Compute Shannon entropy delta
        delta_Shannon = Shannon_T1 - Shannon_T0
    )
head(metrics_wide)

In [None]:
# Convert metrics to long format for plotting
metrics_plot <- metrics_wide %>%
    select(-richness_T0, -richness_T1) %>%
    pivot_longer(
        cols = c(Shannon_T0, Shannon_T1, clonality_T0, clonality_T1),
        names_to = c(".value", "timepoint"),
        names_pattern = "(.*)_(T\\d)"
    )
head(metrics_plot)

### CD4s

In [None]:
# Compute statistical test on shannon entropy for liver and lung mets

# Filter liver values
metrics_wide_liver <- metrics_wide %>% filter(met_loc == "Liver", CD4_CD8_assignment == "CD4")

# (Liver) Paired Wilcoxon test
wilcox_test_liver <- wilcox.test(metrics_wide_liver$Shannon_T0, metrics_wide_liver$Shannon_T1, paired = TRUE)
p_val_liver <- signif(wilcox_test_liver$p.value, 3)
print(p_val_liver)

# Filter lung values
metrics_wide_lung <- metrics_wide %>% filter(met_loc == "Lung", CD4_CD8_assignment == "CD4")

# (Lung) Paired Wilcoxon test
wilcox_test_lung <- wilcox.test(metrics_wide_lung$Shannon_T0, metrics_wide_lung$Shannon_T1, paired = TRUE)
p_val_lung <- signif(wilcox_test_lung$p.value, 3)
print(p_val_lung)

# (T0) Unpaired Wilcoxon test
wilcox_test_t0 <- wilcox.test(
    metrics_wide %>% filter(CD4_CD8_assignment == "CD4", met_loc == "Liver") %>% pull(Shannon_T0), 
    metrics_wide %>% filter(CD4_CD8_assignment == "CD4", met_loc == "Lung") %>% pull(Shannon_T0),
    paired = FALSE
)
p_val_t0 <- signif(wilcox_test_t0$p.value, 3)
print(p_val_t0)

# (T1) Unpaired Wilcoxon test
wilcox_test_t1 <- wilcox.test(
    metrics_wide %>% filter(CD4_CD8_assignment == "CD4", met_loc == "Liver") %>% pull(Shannon_T1), 
    metrics_wide %>% filter(CD4_CD8_assignment == "CD4", met_loc == "Lung") %>% pull(Shannon_T0),
    paired = FALSE
)
p_val_t1 <- signif(wilcox_test_t1$p.value, 3)
print(p_val_t1)

In [None]:
# Plot Shannon diversity comparison for CD4s
options(repr.plot.width = 5, repr.plot.height = 4)
ggplot(metrics_plot %>% filter(CD4_CD8_assignment == "CD4"), aes(x = met_loc, y = Shannon, fill = timepoint)) +
    geom_boxplot(width = 2, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
    geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(1)) + 
    scale_fill_manual(
        values = c("T0" = "#D4D4D4", "T1" = "#3A3B3C"),   
        labels = c("T0" = "SCR", "T1" = "C02")          
    ) +
    theme_linedraw(base_size = 15) + 
    theme(
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),
        plot.title = element_text(hjust=0.5)
    ) + 
    labs(title = "CD4 T Cells", x = "Tissue", y = "Shannon Entropy", fill = "Time Point") + 
    scale_x_discrete(labels = c("Liver" = "Liver (n=6)", "Lung" = "Lung (n=7)")) + 
    #geom_text(aes(x = 1, y = max(Shannon) + 0.3, fontface = "plain",
    #              label = paste0("p = ", p_val_liver)), inherit.aes = FALSE, size = 5) + 
    #geom_text(aes(x = 2, y = max(Shannon) + 0.3, fontface = "plain",
    #              label = paste0("p = ", p_val_lung)), inherit.aes = FALSE, size = 5) 
    ylim(3.5, 10)
ggsave(filename = file.path(fig_dir, "Boxplot_CD4_Entropy_Tissue-Timepoint.pdf"), plot = last_plot(), dpi = 300, width = 5, height = 4)

In [None]:
library(boot)

# Function to compute median Δ and 95% CI per tissue, using precomputed p-value
compute_delta_stats <- function(df, tissue, p_val, lineage = "CD4", delta_col = "delta_Shannon") {
  
  # Subset delta values
  delta_vals <- df %>%
    filter(CD4_CD8_assignment == lineage, met_loc == tissue) %>%
    pull({{delta_col}})
  
  # Median
  med <- median(delta_vals)
  
  # Bootstrap CI
  median_fun <- function(data, indices) median(data[indices])
  set.seed(123)
  boot_obj <- boot(delta_vals, statistic = median_fun, R = 10000)
  ci <- boot.ci(boot_obj, type = "perc")$percent[4:5]  # 95% CI lower/upper
  
  # Return as tibble
  tibble(
    Tissue = tissue,
    Lineage = lineage,
    Median_Delta = med,
    CI_lower = ci[1],
    CI_upper = ci[2],
    Wilcox_p = p_val
  )
}

# Compute stats for liver and lung using your precomputed p-values
stats_liver <- compute_delta_stats(metrics_wide, "Liver", p_val_liver, "CD4")
stats_lung  <- compute_delta_stats(metrics_wide, "Lung",  p_val_lung,  "CD4")

# Combine into a single table
delta_stats_table <- bind_rows(stats_liver, stats_lung)

# Print table
print(delta_stats_table)

### CD8s

In [None]:
# Compute statistical test on shannon entropy for liver and lung mets

# Filter liver values
metrics_wide_liver <- metrics_wide %>% filter(met_loc == "Liver", CD4_CD8_assignment == "CD8")

# (Liver) Paired Wilcoxon test
wilcox_test_liver <- wilcox.test(metrics_wide_liver$Shannon_T0, metrics_wide_liver$Shannon_T1, paired = TRUE)
p_val_liver <- signif(wilcox_test_liver$p.value, 3)
print(p_val_liver)

# Filter lung values
metrics_wide_lung <- metrics_wide %>% filter(met_loc == "Lung", CD4_CD8_assignment == "CD8")

# (Lung) Paired Wilcoxon test
wilcox_test_lung <- wilcox.test(metrics_wide_lung$Shannon_T0, metrics_wide_lung$Shannon_T1, paired = TRUE)
p_val_lung <- signif(wilcox_test_lung$p.value, 3)
print(p_val_lung)

# (T0) Unpaired Wilcoxon test
wilcox_test_t0 <- wilcox.test(
    metrics_wide %>% filter(CD4_CD8_assignment == "CD8", met_loc == "Liver") %>% pull(Shannon_T0), 
    metrics_wide %>% filter(CD4_CD8_assignment == "CD8", met_loc == "Lung") %>% pull(Shannon_T0),
    paired = FALSE
)
p_val_t0 <- signif(wilcox_test_t0$p.value, 3)
print(p_val_t0)

# (T1) Unpaired Wilcoxon test
wilcox_test_t1 <- wilcox.test(
    metrics_wide %>% filter(CD4_CD8_assignment == "CD8", met_loc == "Liver") %>% pull(Shannon_T1), 
    metrics_wide %>% filter(CD4_CD8_assignment == "CD8", met_loc == "Lung") %>% pull(Shannon_T0),
    paired = FALSE
)
p_val_t1 <- signif(wilcox_test_t1$p.value, 3)
print(p_val_t1)

In [None]:
# Plot Shannon diversity comparison for CD8s
options(repr.plot.width = 5, repr.plot.height = 4)
ggplot(metrics_plot %>% filter(CD4_CD8_assignment == "CD8"), aes(x = met_loc, y = Shannon, fill = timepoint)) +
    geom_boxplot(width = 2, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
    geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(1)) + 
    scale_fill_manual(
        values = c("T0" = "#D4D4D4", "T1" = "#3A3B3C"),   
        labels = c("T0" = "SCR", "T1" = "C02")          
    ) +
    theme_linedraw(base_size = 15) + 
    theme(
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),
        plot.title = element_text(hjust=0.5)
    ) + 
    labs(title = "CD8 T Cells", x = "Tissue", y = "Shannon Entropy", fill = "Time Point") + 
    scale_x_discrete(labels = c("Liver" = "Liver (n=6)", "Lung" = "Lung (n=7)")) + 
    #geom_text(aes(x = 1, y = max(Shannon) + 0.2,  fontface = "plain",
    #              label = paste0("p = ", p_val_liver)), inherit.aes = FALSE, size = 5) + 
    #geom_text(aes(x = 2, y = max(Shannon) + 0.2, fontface = "plain",
    #              label = paste0("p = ", p_val_lung)), inherit.aes = FALSE, size = 5) 
    ylim(3.5, 10)
ggsave(filename = file.path(fig_dir, "Boxplot_CD8_Entropy_Tissue-Timepoint.pdf"), plot = last_plot(), dpi = 300, width = 5, height = 4)

In [None]:
library(boot)

# Function to compute median Δ and 95% CI per tissue, using precomputed p-value
compute_delta_stats <- function(df, tissue, p_val, lineage = "CD8", delta_col = "delta_Shannon") {
  
  # Subset delta values
  delta_vals <- df %>%
    filter(CD4_CD8_assignment == lineage, met_loc == tissue) %>%
    pull({{delta_col}})
  
  # Median
  med <- median(delta_vals)
  
  # Bootstrap CI
  median_fun <- function(data, indices) median(data[indices])
  set.seed(123)
  boot_obj <- boot(delta_vals, statistic = median_fun, R = 10000)
  ci <- boot.ci(boot_obj, type = "perc")$percent[4:5]  # 95% CI lower/upper
  
  # Return as tibble
  tibble(
    Tissue = tissue,
    Lineage = lineage,
    Median_Delta = med,
    CI_lower = ci[1],
    CI_upper = ci[2],
    Wilcox_p = p_val
  )
}

# Compute stats for liver and lung using your precomputed p-values
stats_liver <- compute_delta_stats(metrics_wide, "Liver", p_val_liver, "CD8")
stats_lung  <- compute_delta_stats(metrics_wide, "Lung",  p_val_lung,  "CD8")

# Combine into a single table
delta_stats_table <- bind_rows(stats_liver, stats_lung)

# Print table
print(delta_stats_table)