# T Cell Signature Scoring Evaluation

### Env Setup

In [None]:
# Load project configuration
setwd("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR")
options(repr.matrix.max.rows=100, repr.matrix.max.cols=100)
options(warn = -1)
source("code/helper/Config.R", echo = FALSE)

In [None]:
# Import plotting helper functions
source("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR/code/helper/Plotting_Functions.R", echo = FALSE)

In [None]:
# Define figures path
fig_dir <- "/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR/out/figs/TCR_Fig_Jan/signatures"

### Load cell-level signature scores data

In [None]:
data <- read.table("/scratch_isilon/groups/singlecell/gdeuner/SERPENTINE_TCR/out/data/scored_enriched_cells.csv.csv", sep = ",", header = 1)

In [None]:
head(data)
dim(data)
length(unique(data$clonotype_id))

In [None]:
# Filter for cells at C02
data <- data %>%
    filter(Timepoint == 2)

### Summarise scores at the patient level

In [None]:
summ_data <- data %>% 
    group_by(Patient, Tissue) %>%
    summarise(
        Activity = mean(Activity),
        Activation = mean(Activation),
        Effector = mean(Effector),
        Residency = mean(Residency),
        Tumor_reactivity = mean(Tumor_reactivity)
    )
summ_data

In [None]:
summ_data%>% arrange(Residency)

In [None]:
# Comput effect sizes
residency_liver <- summ_data %>% filter(Tissue == "Liver") %>% pull(Residency)
residency_lung <- summ_data %>% filter(Tissue == "Lung") %>% pull(Residency)

delta <- median(residency_lung) - median(residency_liver)
cat("Delta:", delta)

# Compute non-parametric CI
set.seed(123)
n_boot <- 10000

boot_delta <- replicate(n_boot, {
  lung_star  <- sample(residency_lung,  replace = TRUE)
  liver_star <- sample(residency_liver, replace = TRUE)
  median(lung_star, na.rm = TRUE) - median(liver_star, na.rm = TRUE)
})

# 95% percentile CI
ci <- quantile(boot_delta, probs = c(0.025, 0.975))

cat("\n95% CI:", ci, "\n")

In [None]:
# Compute effect sizes and 95% CI
# Use effe
library(rstatix)
set.seed(124)
summ_data_patient <- summ_data %>%
    group_by(Patient, Tissue) %>%
    summarise(Residency = mean(Residency), .groups = "drop")
eff <- summ_data_patient %>% rstatix::wilcox_effsize(
    Residency ~ Tissue, 
    paired = FALSE,
    ci = TRUE,
    conf.level = 0.95,
    ci.type = "perc",
    nboot = 1000
)
eff

In [None]:
library(ggpubr)
options(repr.plot.width = 4, repr.plot.height = 4)
for (score in c("Activity",	"Activation", "Effector", "Residency", "Tumor_reactivity")) {
    p <- ggplot(summ_data, aes(x = Tissue, y = .data[[score]], fill = Tissue), col = "black") +
        geom_boxplot(width = 2, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
            geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(1)) + 
        scale_fill_identity() +  
        labs(y = paste(score ,"Score"), fill = "") +
        theme_linedraw(base_size = 15) +
        theme(
            legend.position = "none",
            panel.border = element_rect(color = "black", fill = NA, size = 1.5),
            axis.text.x = element_text(angle = 0, vjust = 0.5, hjust = 0.5),
            #panel.grid = element_blank(),
            plot.title = element_text(hjust = 0.5)
        ) +
        scale_fill_manual(
            values = c("Lung" = "#1a80bb", "Liver" = "#a00000")     
            ) + 
        stat_compare_means(
            method = "wilcox.test",
            paired = FALSE,
            label = "p.format",   # or "p.signif"
            label.x = 1.25
        )
    print(p)
    ggsave(filename = file.path(fig_dir, paste("Boxplot_thrs2both_", score, ".pdf")), plot = last_plot(), dpi = 300, width = 4, height = 4)
    }

In [None]:
# Summarize scores stratified by presence status
summ_data_2 <- data %>% 
    group_by(Patient, Tissue, presence_status) %>%
    summarise(
        Activity = mean(Activity),
        Activation = mean(Activation),
        Effector = mean(Effector),
        Residency = mean(Residency),
        Tumor_reactivity = mean(Tumor_reactivity)
    ) %>%
    mutate(tissue_pres = paste(Tissue, presence_status))
summ_data_2

In [None]:
library(ggpubr)
options(repr.plot.width = 4, repr.plot.height = 4)
summ_data_2$presence_status <- factor(summ_data_2$presence_status, levels = c("Pre-existing", "De Novo"))
for (score in c("Activity",	"Activation", "Effector", "Residency", "Tumor_reactivity")) {
    p <- ggplot(summ_data_2, aes(x = Tissue, y = .data[[score]], fill = presence_status), col = "black") +
        geom_boxplot(width = 2, outlier.shape = NA, color = "black", alpha = 0.3, size = 1) +
            geom_dotplot(binaxis='y', stackdir='center', position=position_dodge(1)) + 
        scale_fill_identity() +  
        labs(y = paste(score ,"Score"), fill = "") +
        theme_linedraw(base_size = 15) +
        theme(
            legend.position = "none",
            panel.border = element_rect(color = "black", fill = NA, size = 1.5),
            axis.text.x = element_text(angle = 0, vjust = 0.5, hjust = 0.5),
            #panel.grid = element_blank(),
            plot.title = element_text(hjust = 0.5)
        ) +
        scale_fill_manual(
            values = c("Lost" = "#a559aa", "Pre-existing" = "#f0c571", "De Novo" = "#59a89c")        
        ) +
        stat_compare_means(
            method = "wilcox.test",
            #comparisons = list(c("Pre-existing", "De Novo")),    
            paired = TRUE,
            label = "p.format",   # or "p.signif"
            #label.x = 1.25
        )
    print(p)
    ggsave(filename = file.path(fig_dir, paste("Boxplot_Presence-status_thrs2both_", score, ".pdf")), plot = last_plot(), dpi = 300, width = 4, height = 4)
    }