In [1]:
library(Seurat)
library(liana)
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggpubr)
library(cowplot)
library(rstatix)

`%notin%` <- Negate(`%in%`)
set.seed(123)

root.dir <- "/project/bicistronic_carT_gbm_Jackie/"
figures.dir <- paste0(root.dir, "Figures/") # figures will be outputted in this folder
csf.liana.output.dir <- paste0(root.dir, "ProcessedData/", "liana/", "csf/") # liana results (table) will be outputted to this folder 

Loading required package: SeuratObject

Loading required package: sp


Attaching package: ‘SeuratObject’


The following objects are masked from ‘package:base’:

    intersect, t



Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘cowplot’


The following object is masked from ‘package:ggpubr’:

    get_legend



Attaching package: ‘rstatix’


The following object is masked from ‘package:stats’:

    filter




In [2]:
all.high.dose.patients <- c("P1", "P2", "P3", "P4", "P5", "P6", "P7", "P8", "P9", "P10", "P11", "P12")
discovery.cohort.patients <- c("P6", "P1", "P8", "P5", "P2", "P7", "P4", "P3")

# Start here with CSF Seurat object

In [3]:
csf <- readRDS(paste0(root.dir, "ProcessedData/seurat/all_csf_annotated_final.rds"))
csf$response.status <- sapply(csf$Responder_Status, function(x) ifelse(x, "Responder", "Non-responder"))
csf$Cohort <- sapply(csf$Patient, function(x) ifelse(x %in% discovery.cohort.patients, "Discovery", "Validation"))
csf$Day <- factor(csf$Day, levels = c("D0", "D7", "D21"))
csf$cell_type_level3 <- sapply(csf$cell_type_level3, function(x) ifelse(x == "CCD56bright_CD16neg_NK", "CD56bright_CD16neg_NK", x)) # fix typo
csf$cell_type <- csf$cell_type_level3

In [4]:
# combine monocyte/macrophage cell types together
csf$cell_type_broad <- apply(csf@meta.data, 1, function(row) 
    case_when(
        row[['cell_type']] %in% c('Monocyte', 'BAM', 'Microglia', 'Macrophage') ~ 'Mono/Mac',
        TRUE ~ row[["cell_type"]] 
    )
)
csf$cell_type_fine <- csf$cell_type
csf$cell_type <- csf$cell_type_broad
cell.type.col <- "cell_type_broad"
Idents(csf) <- cell.type.col

In [5]:
# create a map of patient to response status
patient.response <- unique(paste0(csf$Patient, "_", csf$Responder_Status))
split_data <- strsplit(patient.response, "_")
patient_response <- setNames(
  sapply(split_data, function(x) ifelse(x[2] == "TRUE", "Responder", "Non-responder")),
  sapply(split_data, function(x) x[1])
)
patient_map <- tibble(
  Patient = names(patient_response),
  response.status = unlist(patient_response)
)

In [6]:
# subset to high dose only
sample_ids <- unique(csf$sample_id)
csf.high.dose <- subset(csf, subset = Dose_Class == "High")
sample_ids_high_dose <- unique(csf.high.dose$sample_id)

In [7]:
# run liana on each sample
for (sample_id_use in sample_ids) {
    print(sample_id_use)
    
    filename <- paste0(csf.liana.output.dir, sample_id_use, "_liana_test.tsv")

    # read in file if it already exists
    if (file.exists(filename)) {
        next
    }

    csf_subset = subset(csf, subset = sample_id == sample_id_use)
    print(dim(csf_subset))
    print(table(csf_subset$cell_type_broad))
    
    liana_test <- liana_wrap(csf_subset, idents_col = cell.type.col)
    liana_test <- liana_test %>%
        liana_aggregate()
    write.table(liana_test, filename, sep = "\t", quote=F, col.names=T, row.names=F)
}

[1] "P1D0"
[1] "P1D7"
[1] "P1D21"
[1] "P2D0"
[1] "P2D7"
[1] "P2D21"
[1] "P3D0"
[1] "P3D7"
[1] "P3D21"
[1] "P4D0"
[1] "P4D7"
[1] "P4D21"
[1] "P5D0"
[1] "P5D7"
[1] "P5D21"
[1] "P6D0"
[1] "P6D7"
[1] "P6D21"
[1] "P7D0"
[1] "P7D7"
[1] "P7D21"
[1] "P8D0"
[1] "P8D7"
[1] "P8D21"
[1] "P9D0"
[1] "P9D7"
[1] "P10D0"
[1] "P10D7"
[1] "P11D0"
[1] "P11D7"
[1] "P12D0"
[1] "P12D7"
[1] "P13D0"
[1] "P13D7"
[1] "P14D0"
[1] "P14D7"
[1] "P15D0"
[1] "P15D7"
[1] "P16D0"
[1] "P16D7"
[1] "P17D0"
[1] "P17D7"
[1] "P18D0"
[1] "P18D7"


In [8]:
# read in LIANA results and aggregate all samples
filename <- paste0(csf.liana.output.dir, "all_samples_combined_unfiltered_liana.tsv")
if (!file.exists(filename)) {
    
    liana_combined <- data.frame()
    for (sample_id_use in sample_ids_high_dose) {
    
        liana_test <- read.csv(paste0(csf.liana.output.dir, sample_id_use, "_liana_test.tsv"), sep = "\t")
    
        patient_id <- unlist(str_split(sample_id_use, "D"))[1]
        day <- paste0("D", unlist(str_split(sample_id_use, "D"))[2])
    
        # add metadata
        liana_test$source <- sapply(liana_test$source, function(x) ifelse(x == "CCD56bright_CD16neg_NK", "CD56bright_CD16neg_NK", x)) # fix typo
        liana_test$target <- sapply(liana_test$target, function(x) ifelse(x == "CCD56bright_CD16neg_NK", "CD56bright_CD16neg_NK", x)) # fix typo
                                    
        liana_test$sample_id <- sample_id_use
        liana_test$Patient <- patient_id
        liana_test$Day <- day

        # add reponse status
        response.status <- patient_map %>%
          dplyr::filter(Patient == patient_id) %>%
          dplyr::pull(response.status)
        liana_test$response.status <- response.status
        
        liana_combined <- rbind(liana_combined, liana_test)
    }
    liana_combined <- liana_combined %>% mutate(label = paste0(source, ":", ligand.complex, " -> ", target, ":", receptor.complex))
    liana_combined <- liana_combined %>% mutate(interaction.label = paste0(ligand.complex, " -> ", receptor.complex))
    liana_combined$significance.level <- sapply(liana_combined$aggregate_rank, function(x) case_when(
        x <= 0.05 ~ "<= 0.05",
        x <= 0.1 ~ "<= 0.1",
        TRUE ~ "> 0.1"
    ))
    
    write.table(liana_combined, filename, sep = "\t", quote=F, col.names=T, row.names=F)
} else {

    # read in aggregated file if it already exists
    liana_combined <- read.csv(filename, sep = "\t")
}

In [9]:
rm(csf) # not used downstream
all.patients <- all.high.dose.patients

In [10]:
liana_combined$Day <- factor(liana_combined$Day, levels=c("D0", "D7", "D21"))

In [11]:
# swap CTLA4 from receptor to ligand
swapped.df <- liana_combined %>% filter(receptor.complex == "CTLA4") %>% rename(target = source, source = target, receptor.complex = ligand.complex, ligand.complex = receptor.complex)
other.df <- liana_combined %>% filter(receptor.complex != "CTLA4")
liana_combined <- rbind(other.df, swapped.df)
liana_combined <- liana_combined %>% mutate(label = paste0(source, ":", ligand.complex, " -> ", target, ":", receptor.complex))
liana_combined <- liana_combined %>% mutate(interaction.label = paste0(ligand.complex, " -> ", receptor.complex))

In [12]:
immune.checkpoints <- list(c("TGFB1", "TGFBR1_TGFBR2"),
                           c("BTLA", "TNFRSF14"),
                           c("TNFSF18", "TNFRSF18"),
                           c("CTLA4", "CD80"),
                           c("CTLA4", "CD86"),
                           c("ENTPD1", "ADORA2A"),
                           c("CD274", "PDCD1"),
                           c("PDCD1LG2", "PDCD1"),
                           c("LGALS9", "HAVCR2"),
                           c("HLA-DPA1", "LAG3"),
                           c("CD80", "CD28"),
                           c("CD86", "CD28"),
                           c("CD40LG", "CD40"),
                           c("TNFSF4", "TNFRSF4"), # OX40
                           c("TNFSF9", "TNFRSF9"), # 41BB
                           c("ICOSLG", "CD278"), # ICOS
                           c("CD70", "CD27")
                          )
canonical.interactions <- sapply(immune.checkpoints, function(x) paste0(x[1], " -> ", x[2])) 

In [13]:
plot.canonical.interactions <- function(liana_df, filepath, cell.types, interactions.use=immune.checkpoints, 
                                        levels.interactions=canonical.interactions, 
                                        levels.celltypes = NA,
                                        facet.by.source=T, timepoint.var = "Day",
                                        plot.width = 30, plot.height = NA) {

    
    plot.list <- list()
    for (s in cell.types) {

        if (facet.by.source) {
            liana_filter_df <- liana_df %>% dplyr::filter(source==s)
        } else {
            liana_filter_df <- liana_df %>% dplyr::filter(target==s)
        }
        
        interactions.df <- data.frame()
        for (i in 1:length(interactions.use)) {
            ligand <- interactions.use[[i]][1]
            receptor <- interactions.use[[i]][2]
            temp <- liana_filter_df %>% dplyr::filter(ligand.complex == ligand, receptor.complex == receptor)
            interactions.df <- rbind(interactions.df, temp)
        }
        # add 0 for missing data
        interactions.df <- interactions.df %>% complete(source, target, nesting(ligand.complex, receptor.complex, interaction.label), 
                                                        .data[[timepoint.var]], nesting(Patient, response.status), fill = 
                                                        list(sca.LRscore = 0, natmi.edge_specificity = 0))


        interactions.df$y <- interactions.df$sca.LRscore 
    
        df <- interactions.df 

        if(nrow(interactions.df) == 0) {
            next
        }
        
        if (!all(is.na(levels.interactions))) {
            df$interaction.label <- factor(df$interaction.label, levels = canonical.interactions)
        }

        if (!all(is.na(levels.celltypes))) {
            df$target <- factor(df$target, levels = levels.celltypes)
        }
    
        df$`Interaction strength` <- df$sca.LRscore 
        df$`Interaction specificity` <- df$natmi.edge_specificity
        df$Patient <- factor(df$Patient, levels = all.patients)

        if (facet.by.source) {
            x <- "target"
            x.label <- "Target"
        } else {
            x <- "source"
            x.label <- "Source"
        }

        p <- ggplot(df, aes(.data[[x]], interaction.label)) +
            geom_point(aes(
                           fill = `Interaction strength`, 
                           size = `Interaction specificity`),
                      shape = 21, stroke = 0.8) +
            theme_bw() +
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
            scale_fill_viridis_c(limits = c(0, 1)) +
            scale_size(
                limits = c(0, 1),
                range = c(0, 10),    
                breaks = c(0, 0.2, 0.4, 0.6, 0.8, 1),
            ) +
            facet_grid(cols=vars(Patient), rows=vars(.data[[timepoint.var]])) +
            xlab(x.label) +
            ylab("Interaction")

        plot.list[[s]] <- p
    }

    if (length(plot.list) == 0) {
        print("no interactions")
    } else {
        if (length(plot.list) > 1) {                                                              
            p <- plot_grid(plotlist = plot.list, ncol = 1, labels = names(plot.list))
        } else {
            p <- p + ggtitle(names(plot.list))
        }
        
        if (is.na(plot.height)) {
            plot.height <- length(plot.list) * 10
        }
        pdf(filepath, width = plot.width, height = plot.height)
        print(p)
        dev.off()
    }
}

In [14]:
plot.canonical.interactions.patients.aggregated <- function(liana_df, filepath, cell.types, timepoint.of.interest, interactions.use=immune.checkpoints, 
                                        levels.interactions=canonical.interactions, 
                                        levels.celltypes = NA,
                                        facet.by.source=T, timepoint.var = "Day",
                                        plot.width = 30, plot.height = NA) {

    plot.list <- list()
    for (s in cell.types) {

        if (facet.by.source) {
            liana_filter_df <- liana_df %>% dplyr::filter(source==s)
        } else {
            liana_filter_df <- liana_df %>% dplyr::filter(target==s)
        }
        
        interactions.df <- data.frame()
        for (i in 1:length(interactions.use)) {
            ligand <- interactions.use[[i]][1]
            receptor <- interactions.use[[i]][2]
            temp <- liana_filter_df %>% dplyr::filter(ligand.complex == ligand, receptor.complex == receptor, .data[[timepoint.var]] == timepoint.of.interest)
            interactions.df <- rbind(interactions.df, temp)
        }
        # add 0 for missing data
        interactions.df <- interactions.df %>% complete(source, target, nesting(ligand.complex, receptor.complex, interaction.label), 
                                                        nesting(Patient, response.status), fill = 
                                                        list(sca.LRscore = 0, natmi.edge_specificity = 0))

        if(nrow(interactions.df) == 0) {
            next
        }

        df <- interactions.df %>% 
            group_by(source, target, ligand.complex, receptor.complex, interaction.label) %>% 
            summarize(mean.sca.LRscore = mean(sca.LRscore),
                      mean.natmi.edge_specificity = mean(natmi.edge_specificity)) %>%
            ungroup()
        
        df$y <- df$mean.sca.LRscore
                
        if (!all(is.na(levels.interactions))) {
            df$interaction.label <- factor(df$interaction.label, levels = canonical.interactions)
        }

        if (!all(is.na(levels.celltypes))) {
            df$target <- factor(df$target, levels = levels.celltypes)
        }
        
        df$`Interaction strength` <- df$mean.sca.LRscore 
        df$`Interaction specificity` <- df$mean.natmi.edge_specificity

        if (facet.by.source) {
            x <- "target"
            x.label <- "Target"
        } else {
            x <- "source"
            x.label <- "Source"
        }

        p <- ggplot(df, aes(.data[[x]], interaction.label)) +
            geom_point(aes(
                           fill = `Interaction strength`, 
                           size = `Interaction specificity`),
                      shape = 21, stroke = 0.8) +
            theme_bw() +
            theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
            scale_fill_viridis_c(limits = c(0, 1)) +
            scale_size(
                limits = c(0, 0.1),
                range = c(0, 8),    
                breaks = c(0, 0.025, 0.05, 0.075, 0.1), # the size scaling can be customized as needed
            ) +
            ylab("Interaction") +
            xlab(x.label)
    
        plot.list[[s]] <- p
    }
    
    if (length(plot.list) == 0) {
        print("no interactions")
    } else {
        if (length(plot.list) > 1) {                                                              
            p <- plot_grid(plotlist = plot.list, ncol = 1, labels = names(plot.list))
        } else {
            p <- p + ggtitle(paste0(names(plot.list), " interactions at ", timepoint.of.interest))
        }
        
        if (is.na(plot.height)) {
            plot.height <- length(plot.list) * 10
        }
        pdf(filepath, width = plot.width, height = plot.height)
        print(p)
        dev.off()
    }
}

In [15]:
# canonical Treg interactions
treg.interactions <- list(
                           c("CTLA4", "CD80"),
                           c("CTLA4", "CD86"),
                           c("LGALS9", "HAVCR2"),
                           c("CD70", "CD27")
                          )

In [16]:
all.celltypes <- sort(unique(liana_combined$target))
celltypes.use <- setdiff(all.celltypes, c("Mixed CAR+/CAR- Tfh", "NKT", "Plasma", "gdT", "ILC", "MAIT", "Choroid_Plexus_Epithelium"))

liana_combined_filtered <- liana_combined %>% filter(target %in% celltypes.use)
liana_combined_filtered <- liana_combined_filtered %>% filter(Patient %in% all.high.dose.patients)
liana_combined_filtered$Patient <- factor(liana_combined_filtered$Patient, levels = all.high.dose.patients)
liana_combined_filtered$Cohort <- sapply(liana_combined_filtered$Patient, function(x) ifelse(x %in% discovery.cohort.patients, "Discovery", "Validation"))

In [17]:
# Supplemental Figure 7E - Discovery cohort
plot.canonical.interactions(liana_df=liana_combined_filtered %>% filter(Patient %in% discovery.cohort.patients, target != "Treg"), 
                            filepath = paste0(figures.dir, "Supp 7E immune checkpoints Treg as source discovery.pdf"), cell.types=c("Treg"), plot.width = 40)


In [18]:
# Supplemental Figure 7E - Validation cohort
liana_combined_filtered_validation <- liana_combined_filtered %>% filter(Patient %notin% discovery.cohort.patients, target != "Treg")
liana_combined_filtered_validation$Day <- factor(liana_combined_filtered_validation$Day, levels = c("D0", "D7"))
plot.canonical.interactions(liana_df=liana_combined_filtered_validation, filepath = paste0(figures.dir, "Supp 7E immune checkpoints Treg as source validation.pdf"), cell.types=c("Treg"), plot.width = 20)


In [19]:
# Figure 6G
plot.canonical.interactions.patients.aggregated(liana_df=liana_combined_filtered %>% filter (target != "Treg"), filepath = paste0(figures.dir, "6G Day 7 Treg interactions Treg as source aggregated.pdf"), 
                            cell.types=c("Treg"), timepoint.of.interest="D7", interactions.use = treg.interactions, levels.celltypes = celltypes.use, plot.width=10, plot.height = 5)


[1m[22m`summarise()` has grouped output by 'source', 'target', 'ligand.complex',
'receptor.complex'. You can override using the `.groups` argument.
“[1m[22mRemoved 1 row containing missing values or values outside the scale range
(`geom_point()`).”


In [20]:
# Figure 6H
csf.high.dose.subset <- subset(csf.high.dose, subset = cell_type %notin% c("Mixed CAR+/CAR- Tfh", "NKT", "Plasma", "gdT", "ILC", "MAIT", "Choroid_Plexus_Epithelium"))
p <- DotPlot(csf.high.dose.subset, features = c("CTLA4"), group.by = "cell_type") + ggtitle("CTLA4 expression in CSF") + xlab("") + ylab("") + coord_flip() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
pdf(paste0(figures.dir, "6H Treg CTLA4 gene expression.pdf"), width = 10, height = 5) 
print(p)
dev.off()

# Plot exhaustion/activation signatures

In [21]:
csf.high.dose$Patient <- factor(csf.high.dose$Patient, levels = all.high.dose.patients)

In [22]:
wherry_signatures <- read.csv(paste0(root.dir, "Resources/", "wherry_pace_signatures.csv"))
effector.signature <- wherry_signatures %>% pull(Pace_effector_markers)
effector.signature <- effector.signature[effector.signature != ""]

In [23]:
gene.signatures <- list()
gene.signatures[["Effector signature"]] <- effector.signature
csf.high.dose <- AddModuleScore(csf.high.dose, feature=gene.signatures, name=names(gene.signatures), search = T)
csf.high.dose@meta.data <- csf.high.dose@meta.data %>% rename(`Effector signature`=`Effector signature1`)

“[1m[22mThe `slot` argument of `GetAssayData()` is deprecated as of SeuratObject 5.0.0.
[36mℹ[39m Please use the `layer` argument instead.
[36mℹ[39m The deprecated feature was likely used in the [34mSeurat[39m package.
  Please report the issue at [3m[34m<https://github.com/satijalab/seurat/issues>[39m[23m.”
“The following features are not present in the object: CCL9, KLRA9, ATPIF1, attempting to find updated synonyms”
Found updated symbols for 1 symbols

ATPIF1 -> ATP5IF1

“The following features are still not present in the object: CCL9, KLRA9”


In [24]:
data <- FetchData(csf.high.dose, vars = c("HLA-DRA", "ENTPD1", "CTLA4", "PDCD1", "LAG3"))
csf.high.dose <- AddMetaData(csf.high.dose, data)

In [25]:
# Figure 6J
genes.of.interest <- c("HLA-DRA", "ENTPD1", "CTLA4",  "Effector signature", "PDCD1", "LAG3")

plot.list <- list()
celltype <- "CD8+ T"
day <- "D7"
timepoint.var <- "Day"
seurat.obj <- csf.high.dose

for (gene.of.interest in genes.of.interest) {

    seurat.obj$gene.of.interest <- seurat.obj[[gene.of.interest]]
    gene.df <- seurat.obj@meta.data %>% 
        filter(cell_type == celltype, .data[[timepoint.var]] == day) %>%
        group_by(cell_type, sample_id, Patient, response.status, Cohort) %>% 
        summarize(mean.expression = mean(gene.of.interest),
                  count = n()) %>%
        ungroup()
    
    stat.test <- gene.df %>%
      wilcox_test(mean.expression ~ response.status) %>%
      adjust_pvalue(method = "fdr") %>%
      add_significance("p.adj") %>%
      add_y_position()
    
    p <- ggplot(gene.df, aes(response.status, mean.expression)) +
        geom_boxplot(aes(fill = response.status), outlier.shape = NA) + 
        geom_point(aes(shape = Cohort), stroke = 1, size = 3, fill = "white", color = "black", position = position_jitter(width=0.2)) + 
        scale_shape_manual(breaks = c("Discovery", "Validation"), values = c(19, 22)) + 
        scale_fill_manual(breaks = c("Non-responder", "Responder"), values = c("#A0412D", "#5FA7A2")) +
        theme_classic() +
        ylab("Expression level") +
        ggtitle(gene.of.interest) +
        stat_pvalue_manual(stat.test, label = "p.adj", tip.length = 0) +
        xlab("Response status") +
        guides(fill = "none")
    
    plot.list[[gene.of.interest]] <- p
}

p <- plot_grid(plotlist = plot.list)
pdf(paste0(figures.dir, "6J CD8+ T Day 7 marker expression.pdf"), width=12, height=9)
print(p)
dev.off()

[1m[22m`summarise()` has grouped output by 'cell_type', 'sample_id', 'Patient',
'response.status'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'cell_type', 'sample_id', 'Patient',
'response.status'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'cell_type', 'sample_id', 'Patient',
'response.status'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'cell_type', 'sample_id', 'Patient',
'response.status'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'cell_type', 'sample_id', 'Patient',
'response.status'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'cell_type', 'sample_id', 'Patient',
'response.status'. You can override using the `.groups` argument.


In [26]:
sessionInfo() 

R version 4.3.2 (2023-10-31)
Platform: aarch64-unknown-linux-gnu (64-bit)
Running under: Ubuntu 22.04.3 LTS

Matrix products: default
BLAS:   /usr/lib/aarch64-linux-gnu/openblas-pthread/libblas.so.3 
LAPACK: /usr/lib/aarch64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so;  LAPACK version 3.10.0

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

time zone: Etc/UTC
tzcode source: system (glibc)

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] rstatix_0.7.2      cowplot_1.1.3      ggpubr_0.6.0       ggplot2_3.5.2     
 [5] tidyr_1.3.1        dplyr_1.1.4        liana_0.1.14       Seurat_5.3.0      
