# Joint TCR and Phenotype Analyses

### Set Up Env

In [None]:
# Load project configuration
setwd("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR")
options(repr.matrix.max.rows=100, repr.matrix.max.cols=100)

source("code/helper/Config.R", echo = FALSE)

In [None]:
# Load extra libraries
library(vegan)
library(ggpubr) 
library(ggalluvial)

In [None]:
# Import plotting helper functions
source("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR/code/helper/Plotting_Functions.R", echo = FALSE)

In [None]:
# Define figures path
fig_dir <- "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR/out/figs/TCR_Fig_Jan"

### Load Tumor 10x Processed TCR Data with matched GEX Profiles

In [None]:
# Read data (wide)
data <- qread(file = file.path(root_dir, "out", "data", "SERP_TCR-GEX_wide_11-2025_v2.qs"))

In [None]:
# Read data (long)
data_long <- qread(file = file.path(root_dir, "out", "data", "SERP_TCR-GEX_11-2025_v2.qs"))

### Prepare data

In [None]:
# Remove outlier
data <- data %>%
    filter(clonotype_id != "TRBV2_TRBJ1-1_GCCAGGGGGGATGATCCGAAGGGGCGGGGGACTGAAGCTTTC")
dim(data)

In [None]:
# Substitute NAs from normalized counts to 0s
data <- data %>%
    mutate(across(c(norm_cloneSize_T0, norm_cloneSize_T1, norm_cloneSize_EOT),
                ~ replace_na(., 0)))

In [None]:
# Create consensus lv1 annotation between SCR and C02 for pre-existing clonotypes
data <- data %>%
    mutate(lv1 = ifelse((presence_status == "Pre-existing") & (lv1_T0 == lv1_T1), lv1_T1, ifelse(
        presence_status == "De Novo", lv1_T1, ifelse(
            presence_status == "Lost", lv1_T0, "not_matched")
    )))

In [None]:
# Create consensus CD4/CD8 annotation between SCR and C02 for pre-existing clonotypes
data <- data %>%
    mutate(CD4_CD8_assignment = ifelse((presence_status == "Pre-existing") & (CD4_CD8_assignment_T0 == CD4_CD8_assignment_T1), CD4_CD8_assignment_T1, ifelse(
        presence_status == "De Novo", CD4_CD8_assignment_T1, ifelse(
            presence_status == "Lost", CD4_CD8_assignment_T0, "not_matched")
    )))

In [None]:
# Filter data for CD4 and CD8 populations, SCR and C02 time points, liver and lung mets, and patients with matched SCR and C02 data
patients_keep <- c("P01", "P02", "P03", "P10", "P14", "P17", "P20", "P26", "P29", "P31", "P33", "P34", "P35") 
data_filt <- data %>%
    filter(
        patient %in% patients_keep,
        met_loc %in% c("Lung", "Liver") 
    )
dim(data_filt)

## Compute effector to naïve ratio across metastatic sites and time points

In [None]:
# Compute effector to naïve ratio for both time points
data_ratio <- data_filt %>%
    group_by(patient, met_loc, lv1) %>%
    summarise(n_T0 = sum(cloneSize_T0), n_T1 = sum(cloneSize_T1)) %>%
    mutate(total_T0 = sum(n_T0), total_T1 = sum(n_T1),
           pct_T0 = n_T0/total_T0, pct_T1 = n_T1/total_T1) %>%
    pivot_wider(names_from = lv1, values_from = c(total_T0, total_T1, n_T0, n_T1, pct_T0, pct_T1)) %>%
    mutate(eff_naive_ratio_T0 = (sum(pct_T0_CD4) + sum(pct_T0_Treg) + sum(pct_T0_CD8)) / sum(`pct_T0_T Naive/CM`),
           eff_naive_ratio_T1 = (sum(pct_T1_CD4) + sum(pct_T1_Treg) + sum(pct_T1_CD8)) / sum(`pct_T1_T Naive/CM`)) %>%
    select(patient, met_loc, eff_naive_ratio_T0, eff_naive_ratio_T1) %>%
    pivot_longer(
        cols = c(eff_naive_ratio_T0, eff_naive_ratio_T1),
        names_to = c(".value", "timepoint"),
        names_pattern = "(.*)_(T\\d)"
    )
    
head(data_ratio)

In [None]:
# Statistical test
p_val_liver <- wilcox.test(data_ratio %>% filter(met_loc == "Liver", timepoint == "T0") %>% pull(eff_naive_ratio), 
            data_ratio %>% filter(met_loc == "Liver", timepoint == "T1") %>% pull(eff_naive_ratio),
            paired = TRUE)$p.value %>% signif(3)
p_val_lung <- wilcox.test(data_ratio %>% filter(met_loc == "Lung", timepoint == "T0") %>% pull(eff_naive_ratio), 
            data_ratio %>% filter(met_loc == "Lung", timepoint == "T1") %>% pull(eff_naive_ratio),
            paired = TRUE)$p.value %>% signif(3)

# Plot ratio
options(repr.plot.width = 5, repr.plot.height = 3.5)
ggplot(data_ratio, aes(x = met_loc, y = eff_naive_ratio, fill = timepoint)) +
        geom_boxplot(width = 2, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
        geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(1)) + 
        scale_fill_manual(
            values = c("T0" = "#D4D4D4", "T1" = "#3A3B3C"),
            labels = c("T0" = "SCR", "T1" = "C02")
        ) +
        theme_linedraw(base_size = 15) + 
        theme(
            panel.border = element_rect(color = "black", fill = NA, size = 1.5),
            plot.title = element_text(hjust=0.5)
        ) + 
        labs(y = "Effector / Naive-CM Ratio", fill = "Time Point", x = "") + 
        guides(size = "none") +
        geom_text(aes(x = 1, y = max(data_ratio$eff_naive_ratio)*1.05,  fontface = "plain",
                      label = paste0(p_val_liver), size = 5)) + 
        geom_text(aes(x = 2, y = max(data_ratio$eff_naive_ratio)*1.05,  fontface = "plain",
                      label = paste0(p_val_lung), size = 5))
ggsave(filename = file.path(fig_dir, "Pct_Ratio_Eff-Naive_Tissue-TP.pdf"), plot = last_plot(), dpi = 300, width = 5, height = 3.5)

In [None]:
# convert ratio data to wide format
data_ratio_wide <- data_ratio %>%
    pivot_wider(
        names_from = timepoint, 
        values_from = eff_naive_ratio,
        names_prefix = "eff_naive_ratio_"
    ) %>%
    # compute delta
    mutate(delta_eff_naive_ratio = eff_naive_ratio_T1 - eff_naive_ratio_T0)
data_ratio_wide

In [None]:
# Compute effect sizes and non-parametric CIs
library(boot)

# Function to compute median Δ and 95% CI per tissue, using precomputed p-value
compute_delta_stats <- function(df, tissue, p_val, delta_col = "delta_eff_naive_ratio") {
  
  # Subset delta values
  delta_vals <- df %>%
    filter(met_loc == tissue) %>%
    pull({{delta_col}})
  
  # Median
  med <- median(delta_vals)
  
  # Bootstrap CI
  median_fun <- function(data, indices) median(data[indices])
  set.seed(123)
  boot_obj <- boot(delta_vals, statistic = median_fun, R = 10000)
  ci <- boot.ci(boot_obj, type = "perc")$percent[4:5]  # 95% CI lower/upper
  
  # Return as tibble
  tibble(
    Tissue = tissue,
    Median_Delta = med,
    CI_lower = ci[1],
    CI_upper = ci[2],
    Wilcox_p = p_val
  )
}

# Compute stats for liver and lung using your precomputed p-values
stats_liver <- compute_delta_stats(data_ratio_wide, "Liver", p_val_liver)
stats_lung  <- compute_delta_stats(data_ratio_wide, "Lung",  p_val_lung)

# Combine into a single table
delta_stats_table <- bind_rows(stats_liver, stats_lung)

# Print table
print(delta_stats_table)

## Phenotype transitions of persistent CD4 T cells across time points

In [None]:
# Rename Lv2 annotations for plotting
data_filt <- data_filt %>%
    mutate(annotation_T0 = recode(annotation_T0,
                              "CD8 resident" = "CD8 Res",
                              "CD4 central memory" = "CD4 CM",
                              "CD8 activated" = "CD8 Act",
                              "CD8 effector" = "CD8 Eff",
                              "CD4 central memory pre-Tfh" = "CD4 CM/pTfh",
                              "CD4 follicular helper" = "CD4 FH",
                              "Tregs activated" = "Tregs Act",
                              "CD8 pre-exhausted" = "CD8 pExh",
                              "CD8 resident exhausted" = "CD8 ResExh",
                              "T proliferating" = "Tpro",
                              "Tregs proliferating" = "Tregs Pro",
                              "CD8 resident activated" = "CD8 ResAct",
                              "CD8 metabolic" = "CD8 Met"),
          annotation_T1 = recode(annotation_T1,
                              "CD8 resident" = "CD8 Res",
                              "CD4 central memory" = "CD4 CM",
                              "CD8 activated" = "CD8 Act",
                              "CD8 effector" = "CD8 Eff",
                              "CD4 central memory pre-Tfh" = "CD4 CM/pTfh",
                              "CD4 follicular helper" = "CD4 FH",
                              "Tregs activated" = "Tregs Act",
                              "CD8 pre-exhausted" = "CD8 pExh",
                              "CD8 resident exhausted" = "CD8 ResExh",
                              "T proliferating" = "Tpro",
                              "Tregs proliferating" = "Tregs Pro",
                              "CD8 resident activated" = "CD8 ResAct",
                              "CD8 metabolic" = "CD8 Met"))

In [None]:
# Subset data for pre-existing and CD4 populations
subsets <- c('CD4 CM', 'CD4 CM/pTfh', 'CD4 FH', 'T Naive', 'Th-1', 'Th-17', 'Tpro', 'Tregs', 'Tregs Act', 'Tregs Pro')
cd4_data_lv2 <- data_filt %>%
    filter(presence_status == "Pre-existing",
           CD4_CD8_assignment == "CD4",
           !is.na(annotation_T0), !is.na(annotation_T1),
           !(lv1_T0 %in% c("CD8", "NC", "NA", NA)),
           !(lv1_T1 %in% c("CD8", "NC", "NA", NA)),
           annotation_T0 %in% subsets, # filter for subsets of interest
           annotation_T1 %in% subsets
          ) %>%
    group_by(annotation_T0, annotation_T1, met_loc) %>%
    summarise(freq = n()) %>%
    group_by(met_loc) %>%
    mutate(prop = freq/sum(freq))
head(cd4_data_lv2)

In [None]:
# convert df to long format
cd4_data_lv2_long <- cd4_data_lv2 %>%
    pivot_longer(
        cols = c(annotation_T0, annotation_T1),
        names_to = "time_point",
        values_to = "annotation"
    ) %>%
    mutate(
        time_point = recode(time_point, annotation_T0 = "T0",annotation_T1 = "T1")
    )
head(cd4_data_lv2_long)

In [None]:
# define color palatte
pal <- c(
    "#023fa5", "#7d87b9", "#bec1d4", "#d6bcc0", "#bb7784", "#8e063b", "#4a6fe3", "#8595e1", "#b5bbe3", "#e6afb9",
    "#e07b91", "#d33f6a", "#11c638", "#8dd593", "#c6dec7", "#ead3c6", "#f0b98d", "#ef9708", "#0fcfc0", "#9cded6", 
    "#d5eae7", "#f3e1eb", "#f6c4e1", "#f79cd4"
)
names(pal) <- c(
    'CD4 CM', 'CD4 CM/pTfh', 'CD4 FH', 'CD8 IFN', "CD8 NK-like", "CD8 Act", "CD8 Eff", "CD8 met", "CD8 pExh",
    "CD8 Res", "CD8 ResAct", "CD8 ResExh", "MAIT-17", "NK", "NK-Tgd", 'T Naive', 'Tpro', "Tgd-17", "Tgd-V1", 
    'Th-1', 'Th-17', 'Tregs', 'Tregs Act', "Tregs Pro")

In [None]:
# Alluvial plot - Liver
options(repr.plot.width = 6, repr.plot.height = 4)
ggplot(cd4_data_lv2 %>% filter(met_loc == "Liver", !is.na(annotation_T0), !is.na(annotation_T1)), aes(axis1 = annotation_T0, axis2 = annotation_T1, y = prop)) + 
    geom_alluvium(aes(fill = annotation_T0)) + 
    geom_stratum(aes(axis1 = annotation_T0, fill = annotation_T0), alpha = 0.5) +
    scale_fill_manual(values = pal) +
    scale_color_manual(values = pal) +
    labs(title = "Liver CD4", fill = "Lv2 SCR", y = "Proportion") +
    theme_linedraw(base_size = 15) +
    theme(
        legend.position = "right",
        panel.grid = element_blank(),
        panel.border = element_rect(color = "black", linewidth = 1.5),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.x = element_blank(),
        plot.title = element_text(hjust=0.5, face = "bold")
    ) 
ggsave(filename = file.path(fig_dir, "alluvial", "CD4_Lv2_Liver_Alluvial.pdf"), plot = last_plot(), dpi = 300, width = 6, height = 4)

In [None]:
# Alluvial plot - Lung
options(repr.plot.width = 6, repr.plot.height = 4)
ggplot(cd4_data_lv2 %>% filter(met_loc == "Lung", !is.na(annotation_T0)), aes(axis1 = annotation_T0, axis2 = annotation_T1, y = prop)) + 
    geom_alluvium(aes(fill = annotation_T0)) + 
    geom_stratum(aes(axis1 = annotation_T0, fill = annotation_T0), alpha = 0.5) +
    scale_fill_manual(values = pal) +
    scale_color_manual(values = pal) +
    labs(title = "Lung CD4", fill = "Lv2 SCR", y = "Proportion") +
    theme_linedraw(base_size = 15) +
    theme(
        legend.position = "right",
        panel.grid = element_blank(),
        panel.border = element_rect(color = "black", linewidth = 1.5),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.x = element_blank(),
        plot.title = element_text(hjust=0.5, face = "bold")
    ) 
ggsave(filename = file.path(fig_dir, "alluvial", "CD4_Lv2_Lung_Alluvial.pdf"), plot = last_plot(), dpi = 300, width = 6, height = 4)

## Phenotype transitions of persistent CD8 T cells across time points

In [None]:
# Subset data for pre-existing and CD8 populations
subsets <- c('T Naive', 'CD8 Res', 'CD8 ResAct', 'CD8 ResExh', 'CD8 Act', 'CD8 Eff', 'CD8 pExh', 'Tpro')

cd8_data_lv2 <- data_filt %>%
    filter(presence_status == "Pre-existing",
           CD4_CD8_assignment == "CD8",
           !(lv1_T0 %in% c("CD4", "NC", NA)),
           !(lv1_T1 %in% c("CD4", "NC", NA)),
           annotation_T0 %in% subsets,
           annotation_T1 %in% subsets
          ) %>%
    group_by(annotation_T0, annotation_T1, met_loc) %>%
    summarise(freq = n()) %>%
    group_by(met_loc) %>%
    mutate(prop = freq/sum(freq))
head(cd8_data_lv2)

In [None]:
# Alluvial plot - Liver
library(RColorBrewer)
options(repr.plot.width = 6, repr.plot.height = 4)
ggplot(cd8_data_lv2 %>% filter(met_loc == "Liver", !is.na(annotation_T0)), aes(axis1 = annotation_T0, axis2 = annotation_T1, y = prop)) + 
    geom_alluvium(aes(fill = annotation_T0)) + 
    geom_stratum(aes(axis1 = annotation_T0, fill = annotation_T0), alpha = 0.5) +
    scale_fill_manual(values = pal) +
    labs(title = "Liver CD8", fill = "Lv2 SCR", y = "Proportion") +
    theme_linedraw(base_size = 15) +
    theme(
        legend.position = "right",
        panel.grid = element_blank(),
        panel.border = element_rect(color = "black", linewidth = 1.5),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.x = element_blank(),
        plot.title = element_text(hjust=0.5, face = "bold")
    ) 
ggsave(filename = file.path(fig_dir, "alluvial", "CD8_Lv2_Liver_Alluvial.pdf"), plot = last_plot(), dpi = 300, width = 6, height = 4)

In [None]:
# Alluvial plot - Lung
library(RColorBrewer)
options(repr.plot.width = 6, repr.plot.height = 4)
ggplot(cd8_data_lv2 %>% filter(met_loc == "Lung", !is.na(annotation_T0)), aes(axis1 = annotation_T0, axis2 = annotation_T1, y = prop)) + 
    geom_alluvium(aes(fill = annotation_T0)) + 
    geom_stratum(aes(axis1 = annotation_T0, fill = annotation_T0), alpha = 0.5) +
    scale_fill_manual(values = pal) +
    labs(title = "Lung CD8", fill = "Lv2 SCR", y = "Proportion") +
    theme_linedraw(base_size = 15) +
    theme(
        legend.position = "right",
        panel.grid = element_blank(),
        panel.border = element_rect(color = "black", linewidth = 1.5),
        axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.x = element_blank(),
        plot.title = element_text(hjust=0.5, face = "bold")
    ) 
ggsave(filename = file.path(fig_dir, "alluvial", "CD8_Lv2_Lung_Alluvial.pdf"), plot = last_plot(), dpi = 300, width = 6, height = 4)

## Quantify clonal classess across T cell subtypes

In [None]:
# Read data in long format
data <- qread(file = file.path(root_dir, "out", "data", "SERP_TCR-GEX_11-2025_v2.qs"))
head(data,3)

In [None]:
# Create clonal size column based on proportions
data <- data %>% 
    mutate(cloneClass_2 = case_when(
       cloneSize == 1 ~ "Singlet", 
       cloneSize > 1 ~ cut(
           log10(norm_cloneSize),
           breaks = quantile(log10(norm_cloneSize[cloneSize > 1]), 
                             probs = c(0,0.5,1), na.rm = TRUE),
           labels = c("Lowly Expanded (< Median)", "Highly Expanded (> Median)")     
           )
        )
    )
table(data$cloneClass_2)

In [None]:
patients_keep <- c("P01", "P02", "P03", "P10", "P14", "P17", "P20", "P26", "P29", "P31", "P33", "P34", "P35")
data_exp <- data %>%
    ungroup() %>%
    filter(
        patient %in% patients_keep,
        timepoint != "EOT",
        met_loc %in% c("Liver", "Lung"),
        cloneClass_2 != "NA"
    ) %>%
    group_by(patient, timepoint, met_loc, annotation, cloneClass_2) %>%
    summarise(n = n()) %>%
    group_by(patient, met_loc, timepoint, annotation) %>%
    mutate(prop = n/sum(n)) %>% #median of patient props
    group_by(timepoint, met_loc, annotation, cloneClass_2) %>%
    summarise(
        median_prop = median(prop, na.rm = TRUE),
        q1 = quantile(prop, 0.25, na.rm = TRUE),
        q3 = quantile(prop, 0.75, na.rm = TRUE),
        mean_prop = mean(prop, na.rm = TRUE),
        se = sd(prop, na.rm = TRUE) / sqrt(n()),
        n = n(),
        .groups = "drop"
    )
    
head(data_exp)

In [None]:
# rename timepoint variable
data_exp <- data_exp %>% mutate(timepoint = recode(timepoint, T0 = "SCR", T1 = "C02"))

In [None]:
subset <- c('CD4 follicular helper', 'Tregs', 'Tregs activated', 'Tregs proliferating', 'CD8 activated', 'CD8 effector', 'CD8 pre-exhausted', 'CD8 resident', 'CD8 resident activated', 'CD8 resident exhausted', 'MAIT-17')
data_exp$annotation <- factor(data_exp$annotation, subset)
options(repr.plot.width = 7, repr.plot.height = 5)
data_exp$cloneClass <- factor(data_exp$cloneClass_2, levels = rev(levels(factor(data_exp$cloneClass_2))))
data_exp$timepoint <- factor(data_exp$timepoint, levels = c("SCR", "C02"))
ggplot(data_exp %>% filter(!is.na(annotation), annotation %in% subset), aes(fill = cloneClass_2, y = median_prop, x = annotation)) +
    geom_bar(position="fill", stat="identity") +
    theme_linedraw(base_size = 15) + 
    theme(
        legend.position = "none",
        panel.grid = element_blank(),
        panel.border = element_rect(color = "black", linewidth = 1.5),
        axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
        strip.background = element_rect(fill = "lightgray", color = "black"),
        strip.text = element_text(color = "black", face = "bold"),
        axis.title.x = element_blank()
    ) + 
    labs(x = "", y = "Scaled Patient-Prop. Median", fill = "Clonal Class") + 
    scale_fill_manual(
        values = c("Singlet" = "#140b34", "Lowly Expanded (< Median)" = "#84206b", "Highly Expanded (> Median)" = "#f19f4e"),
        labels = c("Singlet" = "Singlet", "Lowly Expanded (< Median)" = "Lowly Expanded", "Highly Expanded (> Median)" = "Highly Expanded")
    ) +
    facet_grid(timepoint~met_loc)
ggsave(filename = file.path(fig_dir, "CloneClass_Subset-Subtypes_StackBar.pdf"), plot = last_plot(), dpi = 300, width = 7, height = 5) 