# Blood Clonotypes Dynamics

### Env Setup

In [None]:
# Load project configuration
setwd("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR")
options(repr.matrix.max.rows=100, repr.matrix.max.cols=100)
options(warn = -1)
source("code/helper/Config.R", echo = FALSE)

In [None]:
# Import plotting helper functions
source("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR/code/helper/Plotting_Functions.R", echo = FALSE)

In [None]:
# Define figures path
fig_dir <- "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR/out/figs/TCR_Fig_Jan/blood"

### Load OS TCR data with DE information (wide)

In [None]:
# Read data (wide)
data_dir <- "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR/out/data"
data_raw <- qread(file = file.path(data_dir, "SERP_Blood_OS_TCR_DiffExp_Wide_12-2025_v2.qs"), nthreads=32)

### Prepare data

In [None]:
# Filter for blood patients
patients_keep <- c("P01", "P02", "P03", "P10", "P14", "P15", "P19", "P20", "P21")# liver and lung patients with both tps in blood, P15 has C01-C02
data <- data_raw %>%
    filter(Patient %in% patients_keep)
nrow(data)

In [None]:
# Treat C01 time point as SCR for P15 (lung met)
data <- data %>%
    mutate(
        SCR_frequency = ifelse(Patient == "P15", C01_frequency, SCR_frequency),
        `SCR-C02_event` = ifelse(Patient == "P15", `C01-C02_event`, `SCR-C02_event`)
    )

In [None]:
# Filter for clones either present at SCR or C02 or both time points
data <- data %>%
    filter(!is.na(SCR_frequency) | !is.na(C02_frequency))

In [None]:
# Get number of clones
nrow(data)

## SCR vs. C02 Clonal Scatter

geom_hex() divides the 2D plot area into hexagonal bins and colors each bin according to the number of observations it contains, visualizing local point density instead of individual points.

In [None]:
# Global clonal Scatter with hega
options(repr.plot.width = 4, repr.plot.height = 4)

p <- ggplot(data,
            aes(x = log10(C02_frequency+1e-4), y = log10(SCR_frequency+1e-4))) +
            ggrastr::rasterize(geom_hex(bins = 50)) + # rasterise bins
            scale_fill_viridis_c(trans = "log", option = "rocket") +
            labs(
                x = expression(log[10]("Clonal Proportion C02")),
                y = expression(log[10]("Clonal Proportion SCR")),
                title = "Peripheral Blood"
            ) +
            theme_linedraw(base_size = 15) +
            guides(size = "none", fill = "none") + 
            theme(
                legend.position = "bottom",
                panel.grid = element_blank(),
                panel.border = element_rect(color = "black", linewidth = 1.5),
                axis.title = element_text( hjust = 0.5),
                plot.title = element_text(hjust = 0.5),
                axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0)),
                axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))
            ) +
            xlim(log10(1e-4),-1) + ylim(log10(1e-4),-1) + 
            geom_abline(slope = 1, intercept = 0, color = "black") +
            geom_hline(yintercept = log10(1e-4), color = "lightgrey", linewidth = 1) + 
            geom_vline(xintercept = log10(1e-4), color = "lightgrey", linewidth = 1)
print(p)
ggsave(filename = file.path(fig_dir, "ClonalScatter_2D-Density_rasterized_2.pdf"), plot = last_plot(), dpi = 300, width = 5, height = 5)

## Differential Expansion Status ClonalScatter

In [None]:
table(data$`SCR-C02_event`)

In [None]:
# Replace NAs by 0s
data <- data %>%
    mutate(
        SCR_frequency = replace_na(SCR_frequency, 0),
           C02_frequency = replace_na(C02_frequency, 0)
    )

In [None]:
# Global plot
options(repr.plot.width = 8, repr.plot.height = 5)
geom_params = list(shape = 21, alpha = 0.5, stroke = 0.2, color = "black")
p <- ggplot(data %>% filter(!is.na(`SCR-C02_event`)),
            aes(x = log10(C02_frequency+1e-4), y = log10(SCR_frequency+1e-4), fill = `SCR-C02_event`)) +
            ggrastr::rasterize(do.call(geom_point, geom_params)) +
            scale_fill_manual(
                values = c("not_related" = "gray", "de_novo_contracted" = "#d31f11", "contracted" = "#f47a00", "expanded" = "#62c8d3", "de_novo_expanded" = "#007191"),
                labels = c("not_related" = "Not Event Related", "de_novo_contracted" = "Contracted (De Novo)", "contracted" = "Contracted (Pre-Existing)",
                           "expanded" = "Expanded (Pre-Existing)", "de_novo_expanded" = "Expanded (De Novo)")
            ) +
            labs(
                x = expression(log[10]("Frequency C02")),
                y = expression(log[10]("Frequency SCR")),
                fill = "",
                title = "Event-Related Blood Clones"
            ) +
            theme_linedraw(base_size = 15) +
            guides(size = "none") + 
            theme(
                legend.position = "right",
                panel.grid = element_blank(),
                panel.border = element_rect(color = "black", linewidth = 1.5),
                axis.title = element_text(hjust = 0.5),
                plot.title = element_text(hjust = 0.5),
                axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0)),
                axis.title.x = element_text(margin = margin(t = 10, r = 0, b = 0, l = 0))
            ) +
            xlim(log10(1e-4),-1) + ylim(log10(1e-4),-1) + 
            geom_abline(slope = 1, intercept = 0, color = "black") +
            geom_hline(yintercept = log10(0.00014), color = "lightgrey", linewidth = 1) + 
            geom_vline(xintercept = log10(0.00014), color = "lightgrey", linewidth = 1)
print(p)
ggsave(filename = file.path(fig_dir, "ClonalScatter_DE_rasterized.pdf"), plot = last_plot(), dpi = 300, width = 8, height = 5)

## Quantification of Event Related vs. Non Event Related Clones at C02 (pre-existing, de novo)

In [None]:
# Add event related 
data <- data %>%
    mutate(`SCR-C02_de` = ifelse(`SCR-C02_event` %in% c("expanded", "de_novo_expanded", "contracted", "de_novo_contracted"), "Event Related", "Not Event Related")
          )

In [None]:
table(data$`SCR-C02_de`)
table(data$`SCR-C02_de`)/sum(table(data$`SCR-C02_de`))

In [None]:
# Add met_loc information
data <- data %>%
    mutate(met_loc = ifelse(Patient %in% c("P15", "P19", "P20", "P21"), "Lung", "Liver"))

In [None]:
unique(data$Patient)

In [None]:
unique(de_data$Patient)

In [None]:
de_data <- data %>%
    select(clonotype_id, Patient, C02_counts, `SCR-C02_de`, met_loc) %>%
    filter(C02_counts > 0) %>% #filter for clones at C02
    group_by(Patient, `SCR-C02_de`, met_loc) %>% 
    summarise(n = n()) %>%
    group_by(Patient, met_loc) %>%
    mutate(prop = n/sum(n)) %>%
    filter(`SCR-C02_de` == "Event Related") # Show only proportions of event related clones
    
de_data

In [None]:
# Get mean of proportion of event related clones
mean(de_data$prop)

In [None]:
median(de_data$prop)

## Quantification of Pre-Existing vs. De Novo Event Related Clones at C02

In [None]:
de_data <- data %>%
    select(clonotype_id, Patient, `SCR-C02_event`, C02_counts) %>%
    filter(`SCR-C02_event` %in% c("contracted", "expanded", "de_novo_expanded")) %>% # filter out non event related clones at C02
    filter(C02_counts > 0) %>% #filter for clones at C02
    group_by(Patient, `SCR-C02_event`) %>% 
    summarise(n = n()) %>%
    group_by(Patient) %>%
    mutate(prop = n/sum(n)) %>% # Do not compute median since i'll be plotting a boxplot
    mutate(clr_prop = log(prop) - mean(log(prop)))
    
de_data

In [None]:
library(ggpubr)
options(repr.plot.width = 5, repr.plot.height = 5)
de_data$`SCR-C02_event` <- factor(de_data$`SCR-C02_event`, levels = c("contracted", "expanded","de_novo_expanded"))
ggplot(de_data, aes(x = `SCR-C02_event`, y = prop, fill = `SCR-C02_event`), col = "black") +
    geom_boxplot(width = 3, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
        geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(1)) + 
    scale_fill_identity() +  
    labs(y = "Proportion", x = "Event Related Clones", fill = "", title = "C02") +
    theme_linedraw(base_size = 15) +
    theme(
        legend.position = "none",
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),
        axis.text.x = element_text(angle = 15, vjust = 0.5, hjust = 0.5),
        panel.grid = element_blank(),
        plot.title = element_text(hjust = 0.5)
    ) +
    scale_fill_manual(
        values = c("contracted" = "#f47a00", "expanded" = "#62c8d3", "de_novo_expanded" = "#007191"),
        labels = c("contracted" = "Contracted (Pre-Existing)", "expanded" = "Expanded (Pre-Existing)", "de_novo_expanded" = "Expanded (De Novo)")
        ) +
    scale_x_discrete(
        labels = c("contracted" = "Contracted (Pre-Existing)", "expanded" = "Expanded (Pre-Existing)", "de_novo_expanded" = "Expanded (De Novo)")
    ) + 
    stat_compare_means(
        aes(group = `SCR-C02_event`),
        method = "wilcox.test",
        paired = TRUE,
        comparisons = list(c("contracted", "expanded"), c("expanded", "de_novo_expanded"), c("contracted", "de_novo_expanded")),
        label.y = c(0.7, 0.8, 0.9)
    ) + 
    ylim(0,1)
ggsave(filename = file.path(fig_dir, "Boxplot_DE_C02.pdf"), plot = last_plot(), dpi = 300, width = 5, height = 5)
# same p-values make sense cause all patients have the same rank pattern: contracted < expanded pre-ex < expanded de novo

## Quantification of Differentially Expanded vs. Contracted Clones

Among event-related clonotypes, what fraction are expanded vs contracted?

In [None]:
# Add Diff. Expanded / Contracted classification 
data <- data %>%
    mutate(`SCR-C02_diff` = ifelse(`SCR-C02_event` %in% c("expanded", "de_novo_expanded"), "Expanded",
                                  ifelse(`SCR-C02_event` %in% c("contracted", "de_novo_contracted"), "Contracted", "Not Event Related"))
          )
table(data$`SCR-C02_diff`)
table(data$`SCR-C02_event`)

In [None]:
de_data <- data %>%
    select(clonotype_id, Patient, `SCR-C02_diff`) %>%
    #filter(`SCR-C02_diff` != "Not Event Related") %>% # filter out non event related clones
    group_by(Patient, `SCR-C02_diff`) %>% 
    summarise(n = n()) %>%
    group_by(Patient) %>%
    mutate(prop = n/sum(n)) %>%# Do not compute median since i'll be plotting a boxplot
    mutate(clr_prop = log(prop) - mean(log(prop))) %>% 
    filter(`SCR-C02_diff` != "Not Event Related")
head(de_data)

In [None]:
median(de_data %>% filter(`SCR-C02_diff` == "Expanded") %>% pull(prop)) - median(de_data %>% filter(`SCR-C02_diff` == "Contracted") %>% pull(prop))

In [None]:
library(ggpubr)
options(repr.plot.width = 4, repr.plot.height = 4)
ggplot(de_data_2, aes(x = `SCR-C02_diff`, y = prop, fill = `SCR-C02_diff`), col = "black") +
    geom_boxplot(width = 2, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
        geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(0), aes(fill = met_loc)) + 
    scale_fill_identity() +  
    labs(y = "Proportion", x = "", title = "Event Related Clones", fill = "") +
    theme_linedraw(base_size = 15) +
    theme(
        legend.position = "none",
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),
        axis.text.x = element_text(angle = 0, vjust = 0.5, hjust = 0.5),
        plot.title = element_text(hjust = 0.5)
    ) +
    scale_fill_manual(
        values = c("Contracted" = "#E44D09", "Expanded" = "#319DB2", "Lung" = "#1a80bb", "Liver" = "#a00000")
        ) + 
    stat_compare_means(
        method = "wilcox.test",
        paired = TRUE,
        label = "p.format",   # or "p.signif"
        label.x = 1.25,
        label.y = 0.014
    ) +
    ylim(0,0.015)
ggsave(filename = file.path(fig_dir, "Boxplot_DE_Exp-Cont.pdf"), plot = last_plot(), dpi = 300, width = 4, height = 4)

In [None]:
de_data_2 <- data %>%
    select(clonotype_id, Patient, met_loc, `SCR-C02_diff`) %>%
    #filter(`SCR-C02_diff` != "Not Event Related") %>% # filter out non event related clones
    group_by(Patient, `SCR-C02_diff`, met_loc) %>% 
    summarise(n = n()) %>%
    group_by(Patient) %>%
    mutate(prop = n/sum(n)) %>%# Do not compute median since i'll be plotting a boxplot
    mutate(clr_prop = log(prop) - mean(log(prop))) %>% 
    filter(`SCR-C02_diff` != "Not Event Related")
head(de_data_2)

In [None]:
library(ggpubr)
options(repr.plot.width = 4, repr.plot.height = 4)
ggplot(de_data_2, aes(x = `SCR-C02_diff`, y = prop, fill = met_loc), col = "black") +
    geom_boxplot(width = 2, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
        geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(1)) + 
    scale_fill_identity() +  
    labs(y = "Proportion", x = "", title = "Event Related Clones", fill = "") +
    theme_linedraw(base_size = 15) +
    theme(
        legend.position = "none",
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),
        axis.text.x = element_text(angle = 0, vjust = 0.5, hjust = 0.5),
        plot.title = element_text(hjust = 0.5)
    ) +
    scale_fill_manual(
            values = c("Lung" = "#1a80bb", "Liver" = "#a00000")        
        ) +
    stat_compare_means(
        method = "wilcox.test",
        paired = FALSE,
        label = "p.format",   # or "p.signif"
        label.x = 1.25,
        label.y = 0.014
    ) +
    ylim(0,0.015)
ggsave(filename = file.path(fig_dir, "Boxplot_DE_Exp-Cont_Tissue.pdf"), plot = last_plot(), dpi = 300, width = 4, height = 4)

## Quantification of Differentially Expanded and Contracted De Novo and Pre-Existing Clones

For each patient and condition, we identified differentially expanded clonotypes, classified them as de novo or pre-existing, computed the proportion of de novo (or pre-existing) clonotypes among all expanded clonotypes, summarized these patient-level proportions by condition using the median, and compared conditions using a nonparametric Wilcoxon test. Also we computed the proportions of DE clones at C02 for each condition (by dividing the numbe rof DE clones by the total number of clonotypes of that sample).

In [None]:
de_data <- data %>%
    select(clonotype_id, Patient, `SCR-C02_event`, met_loc) %>%
    filter(!is.na(`SCR-C02_event`)) %>%# remove non SCR-C02 related clonotypes
    filter(`SCR-C02_event` != "not_related") %>% # remove non-event related clonotypes (only keep signficant ones)
    group_by(Patient, `SCR-C02_event`, met_loc) %>% 
    summarise(n = n()) %>%
    group_by(Patient) %>%
    mutate(prop = n/sum(n)) %>%
    mutate(clr_prop = log(prop) - mean(log(prop)))
    

de_data$`SCR-C02_event` <- factor(de_data$`SCR-C02_event`, levels = c("de_novo_contracted","contracted","expanded","de_novo_expanded"))

options(repr.plot.width = 6, repr.plot.height = 5)
ggplot(de_data, aes(x = `SCR-C02_event`, y = prop, fill = `SCR-C02_event`), col = "black") +
    geom_boxplot(width = 4, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
        geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(0), aes(fill = met_loc)) + 
    scale_fill_identity() +  
    labs(y = "Proportion", x = "", title = "Event Related Clones", fill = "") +
    theme_linedraw(base_size = 15) +
    theme(
        legend.position = "none",
        panel.border = element_rect(color = "black", fill = NA, size = 1.5),
        axis.text.x = element_text(angle = 15, vjust = 0.5, hjust = 0.5),
        plot.title = element_text(hjust = 0.5)
    ) +
    scale_fill_manual(
        values = c("not_related" = "gray", "de_novo_contracted" = "#d31f11", "contracted" = "#f47a00", "expanded" = "#62c8d3", "de_novo_expanded" = "#007191",
                  "Lung" = "#1a80bb", "Liver" = "#a00000"),
        labels = c("not_related" = "Not Event Related", "de_novo_contracted" = "Contracted (De Novo)", "contracted" = "Contracted (Pre-Existing)",
                   "expanded" = "Expanded (Pre-Existing)", "de_novo_expanded" = "Expanded (De Novo)")
        ) +
    scale_x_discrete(
        labels = c("not_related" = "Not Event Related", "de_novo_contracted" = "Contracted (De Novo)", "contracted" = "Contracted (Pre-Existing)",
                   "expanded" = "Expanded (Pre-Existing)", "de_novo_expanded" = "Expanded (De Novo)")
    ) +
    stat_compare_means(
        method = "wilcox.test",
        paired = TRUE,
        label = "p.format",   # or "p.signif"
        comparisons = list(c("de_novo_expanded", "de_novo_contracted"), 
                           c("de_novo_expanded", "contracted"),
                           c("expanded", "de_novo_contracted"),
                           c("de_novo_expanded", "expanded"), 
                           c("contracted", "de_novo_contracted"),
                           c("expanded", "contracted")
                          ),
        label.y = c(0.95, 0.9, 0.85, 0.8, 0.75, 0.7),
    ) + 
    ylim(0,1)
ggsave(filename = file.path(fig_dir, "Boxplot_DE.pdf"), plot = last_plot(), dpi = 300, width = 6, height = 5)