### Cell-cell communication analysis to find differentially expressed or exclusive interactions found between mregDC and cDC subtypes; cDC subtypes as target (receptor on cDC)
Figure 4G, Supp Figure 4D
Updated 02/06/23

- Author: CW
- files needed: cell_communication_analysis/cpdb_subtype/2_significant_interactions_subtyped/all_samples_subtypedall.tsv, clinical_var_analysis/celltype_mapping.csv
- folder output: ccc_mDC_diffexp_analysis_results/cDC_target and target_plots
- Last updated: 04/11/24

In [1]:
#library('Seurat')
library('dplyr')
library('tidyr')
library('ggplot2')
library('Matrix')
library(patchwork)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘Matrix’


The following objects are masked from ‘package:tidyr’:

    expand, pack, unpack




## 0. Load files

### a. subtype-level interactions 

In [4]:
all_interactions_df <- read.csv('/path_to_file/cpdb_subtyped/2_significant_interactions_subtyped/all_samples_subtypedall.tsv', 
                            sep = '\t', row.names = 'X')

In [5]:
head(all_interactions_df)

Unnamed: 0_level_0,interacting_pair,partner_a,partner_b,gene_a,gene_b,receptor_a,receptor_b,secreted,interacting_cells,significant_mean,sample
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<chr>
0,CD70_CD27,simple:P32970,simple:P26842,CD70,CD27,False,True,True,Cycling T|Cycling T,0.742,D18-11807
1,LCK_CD8_receptor,simple:P06239,complex:CD8_receptor,LCK,,False,True,False,Cycling T|Cycling T,0.947,D18-11807
2,CRTAM_CADM1,simple:O95727,simple:Q9BY67,CRTAM,CADM1,False,False,False,Cycling T|Cycling T,0.223,D18-11807
3,LTA_TNFRSF1B,simple:P01374,simple:P20333,LTA,TNFRSF1B,False,True,True,Cycling T|Cycling T,0.73,D18-11807
4,CCL5_CCR5,simple:P13501,simple:P51681,CCL5,CCR5,False,True,True,Cycling T|Cycling T,0.857,D18-11807
5,CD40LG_integrin_a5b1_complex,simple:P29965,complex:integrin_a5b1_complex,CD40LG,,False,False,True,Cycling T|Cycling T,0.527,D18-11807


### b. list of interactions

In [4]:
celltype_mapping <- read.csv('/path_to_file/clinical_var_analysis/celltype_mapping.csv', row.names ='X')

In [5]:
celltypes <- unique(celltype_mapping$reannotated)

In [6]:
cDC_subtypes <- c('cDC1', 'cDC2', 'mDC')

### c. compile list of celltypes and corresponding subtypes

In [7]:
B <- c("Naive B", "Transitional B", "Memory B", "Plasma B")
mono_macro <- c("Monocyte classical", "Monocyte non-classical", "Macrophage M1", "Macrophage M2", "Macrophage tumor-associated")
endothelial <- c('Endothelial')
fibroblast <- c("Inflammatory fibroblast", "Myofibroblast")
Other_T <- c("DN NKT", "other T")
pDC <- c('pDC')
NK <- c('NK')
CD8_T <- c("CD8 Tn",  "CD8 early activated Teff", "CD8 HNRNPH1+ Teff", "CD8 GZMK+ Teff", 
           "CD8 Tem", "CD8 Texme","CD8 Tex/HS", "CD8 GNLY+ Tex", "CD8 FTL+ Tex", "CD8 NKT")
CD4_T <- c("CD4 Tn", "CD4 CD69+ T", "CD4 Tm" , "CD4 Tex", "CD4 Treg", "CD4 NKT")
Cycling_T <- c("Cycling T")
Tumor <- c("Tumor - Cell Cycle - G1/S", "Tumor - Cell Cycle - G2/M", "Tumor - Cell Cycle HMG-rich", 
           "Tumor - EMT-III", "Tumor - Hypoxia", "Tumor - Interferon/MHC-II (I)", "Tumor - MYC", "Tumor - Protein maturation",
           "Tumor - Respiration", "Tumor - Secreted I", "Tumor - Skin-pigmentation", "Tumor - Stress", 
           "Tumor - Translation initiation", "Tumor - Unassigned", "Tumor - Unfolded protein response", 
           "Tumor - NA")
Keratinocyte <- c('Keratinocyte')
Mast <- c("Mast")

all_celltypes_list <- list(B, mono_macro, endothelial, fibroblast, Other_T, pDC, NK, CD8_T, CD4_T, Cycling_T,
                         Tumor, Keratinocyte, Mast)
names(all_celltypes_list) <- c('B', 'Monocyte/Macrophage', "Endothelial", "Fibroblast", "Other T",
                               "pDC", "NK", "CD8_T", "CD4_T", "Cycling_T",
                               "Tumor", "Keratinocyte", "Mast")

## 1. functions to find interactions

### a. FUNCTION - Get dataframe of relevant interactions between cDC subtypes and the celltype being tested

In [8]:
#concatenate mDC, cDC1, and cDC2 with the B subtypes 
get_relevant_interactions <- function(subtype_list, celltype) {  #subtype list - the list of subtypes for a particular cell type
    possible_interactions <- c()

    for(a in cDC_subtypes) {
        for(b in subtype_list) {
            possible_interactions <- append(possible_interactions, paste0(a, '|', b))
            possible_interactions <- append(possible_interactions, paste0(b, '|', a))
        }
    }

    #subset interactions from table 
    relevant_interactions_df <- all_interactions_df[all_interactions_df$interacting_cells %in% possible_interactions,]

    #add columns for "source" and "target"
    #assume first gene listed is source, second gene listed is target 
    relevant_interactions_df$source <- unlist(lapply(relevant_interactions_df$interacting_cells, function(x) head(strsplit(x, '[|]')[[1]], 1)))
    relevant_interactions_df$target <- unlist(lapply(relevant_interactions_df$interacting_cells, function(x) tail(strsplit(x, '[|]')[[1]], 1)))   

    #add the receptor genes to the partners (so missing values are filled)
    for(r in 1:length(relevant_interactions_df$gene_a)) {
        if(relevant_interactions_df$gene_a[r]=='') {
           relevant_interactions_df$gene_a[r] = tail(strsplit(relevant_interactions_df$partner_a[r], ':')[[1]], 1)
        }
        if(relevant_interactions_df$gene_b[r]=='') {
           relevant_interactions_df$gene_b[r] = tail(strsplit(relevant_interactions_df$partner_b[r], ':')[[1]], 1)
            }
    }  
    #save relevant interactions 
    return(relevant_interactions_df)

} 

### b. FUNCTION - Find exclusive interactions expressed only in one of the three cDC subtypes 

In [9]:
get_exclusive_interactions_target <- function(interaction_df, subtype) {
    
    interaction_df_subtype <- interaction_df[interaction_df$target == subtype,]
    interaction_df_not_subtype <- interaction_df[interaction_df$target != subtype,]
    
    #find interactions that are exclusive to that subtype
    exclusive_ixns <- interaction_df_subtype[!interaction(interaction_df_subtype[c('interacting_pair', 'source')]) %in% interaction(interaction_df_not_subtype[c('interacting_pair', 'source')]),] %>% 
        group_by(interacting_pair, source) %>% summarize(n = n()) %>% filter(n >= 3) %>%
            select(interacting_pair, source) #present in at least 3 samples 
    
    #find the interaction in the interaction_df 
    exclusive_ixn_df <- interaction_df[interaction(interaction_df[c('interacting_pair', 'source')]) %in% interaction(exclusive_ixns),]
    
    if(dim(exclusive_ixn_df)[1] > 0) {
        exclusive_ixn_df$exc <- 'yes'
    }
    return(exclusive_ixn_df)
}

### c. FUNCTION - Find differentially expressed pathways among cDC subtypes 

In [10]:
#put this into a function to run on all celltypes
find_diff_exp_cDC_target <- function(interaction_df) {
    
    #1. as source
    cDC1_source_interactions <- interaction_df %>% filter(target == 'cDC1')
    cDC2_source_interactions <- interaction_df %>% filter(target == 'cDC2')
    mDC_source_interactions <- interaction_df %>% filter(target == 'mDC')
    
    cDC1_cDC2 <- intersect(cDC1_source_interactions[c('interacting_pair', 'source')], cDC2_source_interactions[c('interacting_pair', 'source')])
    #common to cDC1, cDC2, and mDC 
    cDC1_cDC2_mDC <- intersect(cDC1_cDC2, mDC_source_interactions[c('interacting_pair', 'source')])
    
    #find these in the dataframe
    intersected_ixn <- interaction_df[interaction(interaction_df[c('interacting_pair', 'source')]) %in% interaction(cDC1_cDC2_mDC),]
    
    #summarize by the interacting pair/source/target groupings
    intersected_ixn_summ <- intersected_ixn %>% group_by(interacting_pair, target, source) %>% #groups data into the categories when summarize is called 
    reframe(n = n(), significant_mean = significant_mean)
    
    #get unique target pairs to loop through
    unique_ixn_source_pairs <- intersected_ixn_summ %>% distinct(interacting_pair, source)
    
    cDC1_cDC2_pvals <- c()
    cDC1_mDC_pvals <- c()
    cDC2_mDC_pvals <- c()
    
    #store statistics
    cDC1_med <- c()
    cDC2_med <- c()
    mDC_med <- c()
    
    for(ind in 1:dim(unique_ixn_source_pairs)[1]) {

        #get interacting pair and target 
        interaction_w_tg <- intersected_ixn_summ[interaction(intersected_ixn_summ[c('interacting_pair', 'source')]) == interaction(unique_ixn_source_pairs)[ind],] #%>% select(significant_mean)
        #print(interaction_w_tg)
        cDC1 = interaction_w_tg %>% filter(target == 'cDC1')#%>% select(significant_mean)
        cDC2 = interaction_w_tg %>% filter(target == 'cDC2') #%>% select(significant_mean)
        mDC = interaction_w_tg %>% filter(target == 'mDC') #%>% select(significant_mean)
        
        cDC1_vec <- append(rep(0, 38-length(cDC1$significant_mean)), cDC1$significant_mean)
        cDC2_vec <- append(rep(0, 38-length(cDC2$significant_mean)), cDC2$significant_mean)
        mDC_vec <- append(rep(0, 38-length(mDC$significant_mean)), mDC$significant_mean)
        
        #perform wilcoxon test, store the pvalue and the median as a measure of the direction of the difference 
        cDC1_cDC2_pval <- wilcox.test(cDC1_vec, cDC2_vec, exact = FALSE)$p.value
        cDC1_mDC_pval <- wilcox.test(cDC1_vec, mDC_vec, exact = FALSE)$p.value
        cDC2_mDC_pval <-  wilcox.test(cDC2_vec, mDC_vec, exact = FALSE)$p.value
        
        #median of cDC1, cDC2, and mDC
        cDC1_med <- append(cDC1_med, median(cDC1_vec))
        cDC2_med <- append(cDC2_med, median(cDC2_vec))
        mDC_med <- append(mDC_med, median(mDC_vec))

        cDC1_cDC2_pvals <- append(cDC1_cDC2_pvals, cDC1_cDC2_pval)
        cDC1_mDC_pvals <- append(cDC1_mDC_pvals, cDC1_mDC_pval)
        cDC2_mDC_pvals <- append(cDC2_mDC_pvals, cDC2_mDC_pval)


    }
    rank_sum_stats <- data.frame(cDC1tocDC2.pval = cDC1_cDC2_pvals, 
                                 cDC1tomDC.pval = cDC1_mDC_pvals,
                                 cDC2tomDC.pval = cDC2_mDC_pvals, 
                                 cDC1.median = cDC1_med,
                                 cDC2.median = cDC2_med,
                                 mDC.median = mDC_med )
    
    rank_sum_stats <- cbind(unique_ixn_source_pairs, rank_sum_stats)
    return(rank_sum_stats)
}

### d. FUNCTION: Plot mDC only interactions and mDC differential interactions

In [11]:
plot_diff_exp_new <- function(relevant_interactions_df, mDC_only_ixn_df, subtype_list) {
    
    plots <- list()
    #get mean 
    interactions_by_target <- list()

    max_num_samples <- 0
    max_ave_mean <- 0


    for(t in unique(relevant_interactions_df$target)) {
         #filtered dataframe already passed in 
        #print(s)
        st_interactions_all <- relevant_interactions_df %>%
            filter(target == t) %>% 
            filter(!source %in% cDC_subtypes) %>%
            mutate(interaction = interacting_pair) %>% 
            group_by(source, target, interaction, diff_exp_from) %>% #groups data into the categories when summarize is called 
            summarise(n = n(), sig_mean = mean(significant_mean)) #%>% arrange(desc(sig_mean))

        #add mDC only interaction on the other plots with 0 mean and 0 samples
        if(t != 'mDC') {
            df1 <- data.frame(target = t, n = 0, sig_mean = 0, diff_exp_from = '')
            new_row <- cbind(df1, mDC_only_ixn_df)
            st_interactions_all <- rbind(st_interactions_all, new_row)

        }
        #arrange dataframe (not sure if necessary) and filter by mean
        st_interactions_all$interaction <- factor(st_interactions_all$interaction, 
                                                  levels = unique(relevant_interactions_df$interacting_pair))
        st_interactions_all$source=factor(st_interactions_all$source,levels=rev(subtype_list))




        #find max number of samples and max average mean 
        if(max(st_interactions_all$sig_mean) > max_ave_mean) {
            max_ave_mean <- max(st_interactions_all$sig_mean)
        }

        if(max(st_interactions_all$n) > max_num_samples) {
            max_num_samples <- max(st_interactions_all$n)
        }
        
        st_interactions_all <- st_interactions_all%>% arrange(interaction, diff_exp_from)
 
        interactions_by_target[[t]] <- st_interactions_all

        #st_interactions_all$source=factor(st_interactions_all$source,levels=rev(subtype_list))
    }
    
    

    #plot them 
    for(t in unique(relevant_interactions_df$target)) {
        #View(st_interactions_all)
        #Plot results 
        p <- interactions_by_target[[t]] %>% 
         ggplot(aes(interaction, source, col=sig_mean)) +
         ggtitle(t) +
         geom_point(aes(size = n)) +
         theme_classic()+
        #scale_color_viridis(limits = c(0,max_ave_mean)) + 
        scale_color_gradient(low="blue", high="red", limits = c(0,max_ave_mean), oob = scales::squish)+
                scale_size_continuous(limits = c(1,max_num_samples), breaks = append(1, seq(5, max_num_samples, by=5)))+ #change limits 
          theme(axis.text.x = element_text(angle=45, size =10,  hjust=1, vjust=1), 
                axis.text.y = element_text(size =10), 
                plot.title = element_text(hjust = 0.5))

        plots[[t]] <- p
    
    
    }
    
    
return(list(plots, interactions_by_target))
    
    
}


## 3. test functions 

### can generate plots for all cell types, but for CD4, CD8, and Monocyte/Macrophage, take the top 10 interactions to get plots shown in Extended Data Figure 4G and Supplementary Figure 4D

In [None]:
top_n = 10

In [34]:
dir.create('/path_to_file/ccc_mregDC_diffexp_analysis_results/cDC_target/', recursive = TRUE)

In [40]:
names(all_celltypes_list)

In [35]:
#no interactions for Keratinocyte 
#only mDC exclusive interactions for Mast
for(ct in names(all_celltypes_list)) {
        
    plot_ixns_df <- data.frame(matrix(ncol = 15, nrow = 0))

    ct_file <- gsub('/', '_', ct)
    ct_file <- gsub(' ', '_', ct_file)

    #Function a - get relevant interactions 
    interaction_df <- get_relevant_interactions(all_celltypes_list[[ct]], ct)

    #Function b - get exclusive interactions 
    rank_sum_scores <- find_diff_exp_cDC_target(interaction_df)

    ####OPTIONAL: take top 10 using "head" function, otherwise leave as is####
    mDC_to_both_diffexp <- rank_sum_scores[rank_sum_scores$cDC1tomDC.pval < 0.05 & rank_sum_scores$cDC2tomDC.pval < 0.05,] #add head(..., top_n)
    rank_sum_scores <- rank_sum_scores %>% filter(!(cDC1tomDC.pval < 0.05 & cDC2tomDC.pval < 0.05)) #add head(..., top_n)

    mDC_to_cDC1_diffexp <- rank_sum_scores[rank_sum_scores$cDC1tomDC.pval < 0.05,]
    mDC_to_cDC2_diffexp <- rank_sum_scores[rank_sum_scores$cDC2tomDC.pval < 0.05,]

    ##filter interactions and find back in interaction_df
    ixns_to_plot <- mDC_to_both_diffexp #rbind(, mDC_to_cDC1_diffexp, mDC_to_cDC2_diffexp)
    relevant_ixns <- interaction_df[interaction(interaction_df[c('interacting_pair', 'source')]) %in% 
                                   interaction(ixns_to_plot[c('interacting_pair', 'source')]),]

    #######add annotation to indicate differential significance in dataframe########
    ixns_to_plot$diff_exp_from = ''

    #cDC1 to mDC 
    if(sum(ixns_to_plot$cDC1tomDC.pval < 0.05) > 0) {
        ixns_to_plot[ixns_to_plot$cDC1tomDC.pval < 0.05,]$diff_exp_from <- paste0(ixns_to_plot[ixns_to_plot$cDC1tomDC.pval < 0.05,]$diff_exp_from, 'cDC1')
    }

    #cDC2 to mDC 
    if(sum(ixns_to_plot$cDC2tomDC.pval < 0.05) > 0) {
        ixns_to_plot[ixns_to_plot$cDC2tomDC.pval < 0.05,]$diff_exp_from <- paste0(ixns_to_plot[ixns_to_plot$cDC2tomDC.pval < 0.05,]$diff_exp_from, 'cDC2')
    }
    
    #rename 
    if(dim(ixns_to_plot[ixns_to_plot$diff_exp_from == 'cDC1cDC2', ])[1] > 0) {
        ixns_to_plot[ixns_to_plot$diff_exp_from == 'cDC1cDC2', ]$diff_exp_from = 'both'
    }

    ixns_to_plot <- ixns_to_plot %>% arrange(desc(diff_exp_from), desc(mDC.median))

    relevant_ixns$diff_exp_from = ''

    for(ip in interaction(ixns_to_plot[c('interacting_pair', 'source')])) {
        #find the interacting pair and target in relevant_ixns
        sign = ixns_to_plot[interaction(ixns_to_plot[c('interacting_pair', 'source')])== ip,]$diff_exp_from
        relevant_ixns[interaction(relevant_ixns[c('interacting_pair', 'source')]) == ip,]$diff_exp_from <- sign
    }

    relevant_ixns$exc = 'no'

    #Function b - get exclusive mDC interactions 
    mDC_only_ixns <- get_exclusive_interactions_target(interaction_df, 'mDC') #append this to differentially expressed interactions
    if(dim(mDC_only_ixns)[1] > 0) {
        plot_ixns_df <- mDC_only_ixns
        plot_ixns_df$diff_exp_from = ''
    }

    #add differentially expressed interactions to plot_ixns_df 
    plot_ixns_df <- rbind(relevant_ixns, plot_ixns_df)

    #arrange by most differentially expressed 
    plot_ixns_df$diff_exp_from <- factor(plot_ixns_df$diff_exp_from, levels = c('both', '')) # 'cDC1', 'cDC2', #%>% arrange(desc(ixn_sign))

    #
    plot_ixns_df <- plot_ixns_df %>% arrange(diff_exp_from)

    ##for later: add these unique mDC interactions on the other plots with 0 mean and 0 samples
    mDC_only_ixn_df <- unique(mDC_only_ixns[c('source', 'interacting_pair')])
    names(mDC_only_ixn_df)[names(mDC_only_ixn_df) == 'interacting_pair'] <- 'interaction'
    
    
    ####Plot interactions####
    ct_res <- plot_diff_exp_new(plot_ixns_df, mDC_only_ixn_df, all_celltypes_list[[ct]])
    ct_plots <- ct_res[[1]]
    ct_dataframes <- ct_res[[2]]
    
    #save plots
    options(repr.plot.width = 15, repr.plot.height = 15)
    ct_plots[['mDC']]/ct_plots[['cDC1']]/ct_plots[['cDC2']]

    ggsave(paste0('/path_to_file/ccc_mregDC_diffexp_analysis_results/cDC_target/', ct_file, '_source.pdf'), 
          width = 16, height = 12)
    
    #save dataframe that the plot was generated from
    concat_ct_dataframes <- cbind(ct_dataframes[['mDC']], ct_dataframes[['cDC1']][c('n', 'sig_mean')], ct_dataframes[['cDC2']][c('n', 'sig_mean')])

    names(concat_ct_dataframes)[names(concat_ct_dataframes) %in% c('n...5', 'sig_mean...6', 'n...7', 'sig_mean...8', 
                                                    'n...9', 'sig_mean...10')] <- c('mDC_n', 'mDC_sig_mean', 'cDC1_n', 
                                                                                   'cDC1_sig_mean', 'cDC2_n', 
                                                                      'cDC2_sig_mean')

    write.csv(concat_ct_dataframes, paste0('/path_to_file/ccc_mregDC_diffexp_analysis_results/cDC_target/', ct_file, '_source.csv'))
}


ERROR: Error in `$<-.data.frame`(`*tmp*`, "diff_exp_from", value = ""): replacement has 1 row, data has 0


In [2]:
sessionInfo()

R version 4.2.3 (2023-03-15)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: CentOS Linux 7 (Core)

Matrix products: default
BLAS/LAPACK: /net/bmc-lab5/data/kellis/users/cbw3/conda/envs/r-kernel/lib/libopenblasp-r0.3.21.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] patchwork_1.2.0 Matrix_1.6-5    ggplot2_3.5.0   tidyr_1.3.1    
[5] dplyr_1.1.4    

loaded via a namespace (and not attached):
 [1] pillar_1.9.0     compiler_4.2.3   base64enc_0.1-3  tools_4.2.3     
 [5] digest_0.6.35    uuid_1.2-0       lattice_0.22-6   jsonlite_1.8.8  
 [