In [58]:
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(data.table))
suppressPackageStartupMessages(require(RColorBrewer))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(purrr))
suppressPackageStartupMessages(library(circlize))

# Parameters

In [59]:
# Input
base_path <- "/scratch_isilon/groups/singlecell/shared/projects/Inflammation-PBMCs-Atlas"
corrected_logFC_inputpath <- paste0(base_path, "/03_downstream_analysis/06_inflammation_signatures/results/DecoupleR_ulmestimates_mixedmlEvalFilt_L1_Corr.csv")
corrected_logFC_NonFilt_inputpath <- paste0(base_path, "/03_downstream_analysis/06_inflammation_signatures/results/DecoupleR_ulmestimates_mixedmlEval_L1_Corr.csv")
disease_metadata_inputpath <-  paste0(base_path,"/03_downstream_analysis/06_inflammation_signatures/results/disease_metadata.csv")

# Arguments
filter_by_pval <- 0.05

width <- 10
height <- 15
resol <- 400
cellheight <- 15
cellwidth <- 20
fontsize <- 12

# Non filtered

## Load

In [60]:
# Load -------------------------
metadata <- read.csv(disease_metadata_inputpath, header = TRUE, row.names = 1)
correctedNoFilt_logFC <- read.csv(corrected_logFC_NonFilt_inputpath, header = TRUE)

In [61]:
print(head(correctedNoFilt_logFC))

                                                    X  Coef. Std.Err.      z
1  C(disease, Treatment(reference="healthy"))[T.BRCA]  0.221    0.801  0.276
2    C(disease, Treatment(reference="healthy"))[T.CD] -0.670    0.270 -2.484
3  C(disease, Treatment(reference="healthy"))[T.COPD]  0.526    0.384  1.371
4 C(disease, Treatment(reference="healthy"))[T.COVID] -0.113    0.212 -0.533
5   C(disease, Treatment(reference="healthy"))[T.CRC] -0.172    0.741 -0.232
6   C(disease, Treatment(reference="healthy"))[T.HBV] -0.406    0.498 -0.814
  P..z. X.0.025 X0.975. AnnotationLevel CellType
1 0.783  -1.349   1.790          Level1        B
2 0.013  -1.199  -0.141          Level1        B
3 0.170  -0.226   1.278          Level1        B
4 0.594  -0.529   0.303          Level1        B
5 0.817  -1.624   1.280          Level1        B
6 0.415  -1.383   0.571          Level1        B
                                       FactorName disease  Pval_adj
1 10-X-B-X-cytokine_and_receptors_proinflammatory 

In [62]:
unique(correctedNoFilt_logFC$disease)

In [63]:
length(unique(correctedNoFilt_logFC$disease))

In [64]:
length(unique(correctedNoFilt_logFC$FactorName))

In [65]:
#metadata <- metadata %>% filter(disease != "BRCA")
print(metadata)

     disease         diseaseGroup
0       BRCA          solid_tumor
1         CD                IMIDs
2       COPD chronic_inflammation
3      COVID            infection
4        CRC          solid_tumor
5        HBV            infection
6        HIV            infection
7      HNSCC          solid_tumor
8         MS                IMIDs
9        NPC          solid_tumor
10        PS                IMIDs
11       PSA                IMIDs
12        RA                IMIDs
13       SLE                IMIDs
14        UC                IMIDs
15    asthma chronic_inflammation
16 cirrhosis chronic_inflammation
17       flu            infection
18   healthy              healthy
19    sepsis   acute_inflammation


In [66]:
signatures <- c('cytokine_and_receptors_proinflammatory',
               'adhesion_molecules',
               'antigen_presentation_molecules',
               'CD8T_exhaustion',
               'CD8T_tcr_activation',
               'chemokines',
               'chemokine_receptors',
               #'cytokine_and_receptors__antiinflammatory',
               'cytokine_andreceptors_antiinflammatory',
               'IFN_Type_1_2_Lambda',
               'IFN_response',
               'TNF_receptors_ligands',
               'antigen-crosspresentation',
               'IFNG_response',
               'IL4-IL13_response',
               'CD4T_TH17_UP',
               'CD4T_TH2_UP',
               'CD4T_TFH_UP',
               'CD4T_TH1_UP',
               'Tregs_FoxP3_stabilization',
               'effector')

## Preprocessing

**Extract information fron imput DF**

In [67]:
# Preprocess data
# Filter rows based on Pval_adj
#filtered_data <- corrected_logFC %>%
#  mutate(Coef = ifelse(Pval_adj >= filter_by_pval, 0, Coef.))


# Extract Factor function and Celltype
filtered_data <- correctedNoFilt_logFC %>%
  mutate(Function = sapply(strsplit(as.character(FactorName), "-X-"), `[`, 3),
         CellType = sapply(strsplit(as.character(FactorName), "-X-"), `[`, 2))

In [68]:
head(filtered_data)

Unnamed: 0_level_0,X,Coef.,Std.Err.,z,P..z.,X.0.025,X0.975.,AnnotationLevel,CellType,FactorName,disease,Pval_adj,Function
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<dbl>,<chr>
1,"C(disease, Treatment(reference=""healthy""))[T.BRCA]",0.221,0.801,0.276,0.783,-1.349,1.79,Level1,B,10-X-B-X-cytokine_and_receptors_proinflammatory,BRCA,0.9026079,cytokine_and_receptors_proinflammatory
2,"C(disease, Treatment(reference=""healthy""))[T.CD]",-0.67,0.27,-2.484,0.013,-1.199,-0.141,Level1,B,10-X-B-X-cytokine_and_receptors_proinflammatory,CD,0.0549,cytokine_and_receptors_proinflammatory
3,"C(disease, Treatment(reference=""healthy""))[T.COPD]",0.526,0.384,1.371,0.17,-0.226,1.278,Level1,B,10-X-B-X-cytokine_and_receptors_proinflammatory,COPD,0.3688933,cytokine_and_receptors_proinflammatory
4,"C(disease, Treatment(reference=""healthy""))[T.COVID]",-0.113,0.212,-0.533,0.594,-0.529,0.303,Level1,B,10-X-B-X-cytokine_and_receptors_proinflammatory,COVID,0.7796916,cytokine_and_receptors_proinflammatory
5,"C(disease, Treatment(reference=""healthy""))[T.CRC]",-0.172,0.741,-0.232,0.817,-1.624,1.28,Level1,B,10-X-B-X-cytokine_and_receptors_proinflammatory,CRC,0.9205398,cytokine_and_receptors_proinflammatory
6,"C(disease, Treatment(reference=""healthy""))[T.HBV]",-0.406,0.498,-0.814,0.415,-1.383,0.571,Level1,B,10-X-B-X-cytokine_and_receptors_proinflammatory,HBV,0.6337552,cytokine_and_receptors_proinflammatory


In [69]:
# Prepare data for heatmap
heatmap_data <- filtered_data %>%
  select(FactorName, disease, Coef.) %>%
  spread(key = disease, value = Coef.)
head(heatmap_data)

Unnamed: 0_level_0,FactorName,asthma,BRCA,CD,cirrhosis,COPD,COVID,CRC,flu,HBV,HIV,HNSCC,MS,NPC,PS,PSA,RA,sepsis,SLE,UC
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,10-X-B-X-cytokine_and_receptors_proinflammatory,-0.037,0.221,-0.67,1.061,0.526,-0.113,-0.172,-0.666,-0.406,-1.566,0.124,0.801,-0.857,-1.339,-1.041,-0.871,-0.246,0.097,-1.407
2,100-X-T_CD8_Naive-X-adhesion_molecules,0.431,2.767,0.387,2.407,-0.495,-0.799,0.191,-0.769,1.169,-0.563,-0.202,0.835,0.336,1.082,1.292,1.586,-0.95,-0.022,0.995
3,101-X-T_CD8_Naive-X-antigen_presentation_molecules,-0.42,2.557,1.623,5.423,0.758,-0.212,3.445,-0.265,1.116,-1.377,0.545,0.451,-1.537,2.285,2.201,2.448,-0.842,1.448,2.992
4,103-X-T_CD8_NonNaive-X-CD8T_exhaustion,1.085,1.103,0.675,0.575,0.317,-0.099,-1.161,0.21,-0.055,-0.787,0.147,0.034,1.043,0.304,0.714,1.436,0.105,1.713,1.577
5,104-X-T_CD8_NonNaive-X-CD8T_tcr_activation,0.595,-0.524,0.913,3.016,1.249,0.043,-0.889,0.464,-0.872,-1.351,0.083,-0.067,0.263,0.219,0.608,0.816,0.202,1.65,1.809
6,105-X-T_CD8_NonNaive-X-chemokines,0.706,-0.308,0.603,2.172,1.203,0.119,-0.894,0.674,-2.533,-0.92,-0.064,-1.08,-1.989,-0.695,0.672,0.831,0.145,0.984,0.734


In [70]:
heatmap_data$FactorName <- sapply(strsplit(heatmap_data$FactorName, "-X-"), function(x) {
  second_element <- x[2]
  third_element <- x[3]
  
  # Check if the third element starts with "c" + any element in signatures
  if (startsWith(third_element, "c") && substr(third_element, 2, nchar(third_element)) %in% signatures) {
    third_element <- substr(third_element, 2, nchar(third_element))  # Remove the first letter "c"
  }
  
  # Create the new FactorName
  new_name <- paste0(second_element, "-X-", third_element)
  return(new_name)
})
head(heatmap_data)

Unnamed: 0_level_0,FactorName,asthma,BRCA,CD,cirrhosis,COPD,COVID,CRC,flu,HBV,HIV,HNSCC,MS,NPC,PS,PSA,RA,sepsis,SLE,UC
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,B-X-cytokine_and_receptors_proinflammatory,-0.037,0.221,-0.67,1.061,0.526,-0.113,-0.172,-0.666,-0.406,-1.566,0.124,0.801,-0.857,-1.339,-1.041,-0.871,-0.246,0.097,-1.407
2,T_CD8_Naive-X-adhesion_molecules,0.431,2.767,0.387,2.407,-0.495,-0.799,0.191,-0.769,1.169,-0.563,-0.202,0.835,0.336,1.082,1.292,1.586,-0.95,-0.022,0.995
3,T_CD8_Naive-X-antigen_presentation_molecules,-0.42,2.557,1.623,5.423,0.758,-0.212,3.445,-0.265,1.116,-1.377,0.545,0.451,-1.537,2.285,2.201,2.448,-0.842,1.448,2.992
4,T_CD8_NonNaive-X-CD8T_exhaustion,1.085,1.103,0.675,0.575,0.317,-0.099,-1.161,0.21,-0.055,-0.787,0.147,0.034,1.043,0.304,0.714,1.436,0.105,1.713,1.577
5,T_CD8_NonNaive-X-CD8T_tcr_activation,0.595,-0.524,0.913,3.016,1.249,0.043,-0.889,0.464,-0.872,-1.351,0.083,-0.067,0.263,0.219,0.608,0.816,0.202,1.65,1.809
6,T_CD8_NonNaive-X-chemokines,0.706,-0.308,0.603,2.172,1.203,0.119,-0.894,0.674,-2.533,-0.92,-0.064,-1.08,-1.989,-0.695,0.672,0.831,0.145,0.984,0.734


**Edit function name**

**Generate heatmap DF**

In [71]:
heatmap_matrix <- as.matrix(heatmap_data[,-1])
rownames(heatmap_matrix) <- heatmap_data$FactorName
colnames(heatmap_matrix) <- colnames(heatmap_data)[2:length(colnames(heatmap_data))]
#heatmap_matrix <- t(apply(heatmap_matrix, 1, scale))
#heatmap_matrix[is.na(heatmap_matrix)] <- 0


heatmap_df <- as.data.frame(heatmap_matrix)
rownames(heatmap_df) <- heatmap_data$FactorName
colnames(heatmap_df) <- colnames(heatmap_data)[2:length(colnames(heatmap_data))]
head(heatmap_df)

Unnamed: 0_level_0,asthma,BRCA,CD,cirrhosis,COPD,COVID,CRC,flu,HBV,HIV,HNSCC,MS,NPC,PS,PSA,RA,sepsis,SLE,UC
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
B-X-cytokine_and_receptors_proinflammatory,-0.037,0.221,-0.67,1.061,0.526,-0.113,-0.172,-0.666,-0.406,-1.566,0.124,0.801,-0.857,-1.339,-1.041,-0.871,-0.246,0.097,-1.407
T_CD8_Naive-X-adhesion_molecules,0.431,2.767,0.387,2.407,-0.495,-0.799,0.191,-0.769,1.169,-0.563,-0.202,0.835,0.336,1.082,1.292,1.586,-0.95,-0.022,0.995
T_CD8_Naive-X-antigen_presentation_molecules,-0.42,2.557,1.623,5.423,0.758,-0.212,3.445,-0.265,1.116,-1.377,0.545,0.451,-1.537,2.285,2.201,2.448,-0.842,1.448,2.992
T_CD8_NonNaive-X-CD8T_exhaustion,1.085,1.103,0.675,0.575,0.317,-0.099,-1.161,0.21,-0.055,-0.787,0.147,0.034,1.043,0.304,0.714,1.436,0.105,1.713,1.577
T_CD8_NonNaive-X-CD8T_tcr_activation,0.595,-0.524,0.913,3.016,1.249,0.043,-0.889,0.464,-0.872,-1.351,0.083,-0.067,0.263,0.219,0.608,0.816,0.202,1.65,1.809
T_CD8_NonNaive-X-chemokines,0.706,-0.308,0.603,2.172,1.203,0.119,-0.894,0.674,-2.533,-0.92,-0.064,-1.08,-1.989,-0.695,0.672,0.831,0.145,0.984,0.734


**Add missing diseases**

In [72]:
colnames(heatmap_df)

In [73]:
unique(metadata$disease)

In [74]:
for (disease in unique(metadata$disease)) {
  if (!(disease %in% colnames(heatmap_df))) {
    heatmap_df[[disease]] <- NA
  }
}

**Disease order**

In [75]:
disease_order = c(
  'RA','PS', 'PSA', 'CD','UC','SLE', 'MS',
    
  'asthma', 'COPD', 'cirrhosis', 
    
  'sepsis', 
    
  'HIV', 'HBV', 'COVID', 'flu', 
    
  'BRCA', 'NPC', 'HNSCC', 'CRC' 
)

**Celltype order**

In [76]:
heatmap_celltypes_order= c(
  'B','Plasma',
    
  'pDC', 'DC', 'Mono', 
        
  'T_CD4_Naive', 'T_CD4_NonNaive', 'T_CD8_Naive', 'T_CD8_NonNaive', 
    
  'UTC', 'ILC'
)

**Functions order**

In [77]:
functions_order <- c('adhesion_molecules',
    
                'antigen_presentation_molecules',
    
                'cytokine_and_receptors_proinflammatory',
                #'cytokine_and_receptors__antiinflammatory',
                'cytokine_andreceptors_antiinflammatory',


               'chemokines',
               'chemokine_receptors',

               'IFN_Type_1_2_Lambda',
               'IFN_response',
               'TNF_receptors_ligands',

               'CD4T_TFH_UP',
               'CD4T_TH1_UP',
               'CD4T_TH2_UP',
               'CD4T_TH17_UP',
               
               'Tregs_FoxP3_stabilization',

               'CD8T_exhaustion',
               'CD8T_tcr_activation',
                
               'effector',
                
               'IFNG_response',
               'IL4-IL13_response',

               'antigen-crosspresentation'
   
               )

## Color Annotations

In [114]:
disease_order_dendogram <- c("cirrhosis", "SLE", "UC", "RA", "PSA", "PS", "CD", "HIV", "HBV", "NPC", "MS", "CRC", "asthma", "BRCA", "COPD", "HNSCC", "sepsis", "COVID", "flu")

In [115]:
# Prepare annotations
# Column annotations
col_annotations <- metadata %>%
  arrange(diseaseGroup) %>%
  filter(diseaseGroup != "healthy") %>%
  mutate(diseaseGroup = as.factor(diseaseGroup))
col_annotations <- col_annotations[match(disease_order_dendogram, col_annotations$disease), ]
rownames(col_annotations) <- NULL
order_col <- col_annotations$disease
col_annotations

disease,diseaseGroup
<chr>,<fct>
cirrhosis,chronic_inflammation
SLE,IMIDs
UC,IMIDs
RA,IMIDs
PSA,IMIDs
PS,IMIDs
CD,IMIDs
HIV,infection
HBV,infection
NPC,solid_tumor


In [116]:
col_annotations <- col_annotations %>%
  tibble::column_to_rownames("disease")

In [117]:
col_annotations$disease <- factor(rownames(col_annotations), levels = unique(rownames(col_annotations)))

In [118]:
row_annotDF <- heatmap_data %>%
  mutate(Function = sapply(strsplit(as.character(FactorName), "-X-"), `[`, 2),
         CellType = sapply(strsplit(as.character(FactorName), "-X-"), `[`, 1))

In [148]:
# Row annotations
row_annotations <- row_annotDF %>%
  select(FactorName, Function, CellType) %>%
  distinct() %>%
  mutate(Function = as.factor(Function),
         CellType = as.factor(CellType))
row_annotations$CellType <- factor(row_annotations$CellType, levels = heatmap_celltypes_order)
row_annotations$Function <- factor(row_annotations$Function, levels = functions_order)

row_annotations <- row_annotations %>%
  arrange(Function, CellType)
  #arrange(CellType, Function)
order_row <- row_annotations$FactorName
head(row_annotations)

Unnamed: 0_level_0,FactorName,Function,CellType
Unnamed: 0_level_1,<chr>,<fct>,<fct>
1,B-X-adhesion_molecules,adhesion_molecules,B
2,Plasma-X-adhesion_molecules,adhesion_molecules,Plasma
3,pDC-X-adhesion_molecules,adhesion_molecules,pDC
4,DC-X-adhesion_molecules,adhesion_molecules,DC
5,Mono-X-adhesion_molecules,adhesion_molecules,Mono
6,T_CD4_Naive-X-adhesion_molecules,adhesion_molecules,T_CD4_Naive


In [149]:
row_annotations <- row_annotations %>% select(-Function)

In [150]:
length(unique(row_annotations$Function))

### Define Palettes

**Celltypes**

In [151]:
annotation_Level1_palette <- list(
  'B' = '#7bc6d6',
  'Plasma' = '#025566',
    
  'pDC' = '#a7c957',
  'DC' = '#6a994e',
  'Mono' = '#386641',
    
  'T_CD4_Naive' = '#fff3b0',
  'T_CD4_NonNaive' = '#e09f3e',
  'T_CD8_Naive' = '#9e2a2b',
  'T_CD8_NonNaive' = '#540b0e',
    
  'UTC' = '#88657f',
  'ILC' = '#67253a',
    
  'Cycling_cells' = '#d4a373',
  'Progenitors' = '#ccd5ae',
    
  'Platelets' = '#808080',  # To remove
  'RBC' = '#000000'         # To remove
)
celltype_names <- names(annotation_Level1_palette)
l1_palette <- unlist(annotation_Level1_palette)

**Diseases**

In [152]:
# Create the named list
diseases_palette <- list(
  'healthy' = "#808080",
    
  'RA' = '#264653',
  'PS' = '#287271',
  'PSA' = '#2a9d8f',
  'CD' = '#e76f51',
  'UC' = '#e9c46a',
  'SLE' = '#941c2f', 
  'MS' = '#8ab17d',
    
  'asthma' = '#ea698b',
  'COPD' = '#c05299',
  'cirrhosis' = '#973aa8',
    
  'sepsis' = '#ef233c',
    
  'HIV' = '#e7ecef',
  'HBV' = '#a3cef1',
  'COVID' = '#6096ba', 
  'flu' = '#274c77', 
    
  'BRCA' = '#fff75e',
  'NPC' = '#fdb833',
  'HNSCC' = '#d9981a',
  'CRC' = '#9e7524'
)

# Generate vectors for keys and values
diseases <- names(diseases_palette)
disease_palette <- unlist(diseases_palette)

**DiseaseGroup**

In [153]:
diseasesGroup_palette <- list(
  'IMIDs' = '#2a9d8f',
  'solid_tumor' = '#e3a52d',
  'chronic_inflammation' = '#ffafcc',
  'acute_inflammation' = '#ef233c',
  'infection' = '#abc4ff',
  'healthy' = '#808080'
)

# Generate vectors for keys and values
diseaseGroups <- names(diseasesGroup_palette)
diseaseGroups_palette <- unlist(diseasesGroup_palette)

**Functions**

In [154]:
function_palette <- c(
  "#f3c300", "#875692", "#f38400", "#a1caf1", "#be0032", "#c2b280",
  "#848482", "#008856", "#e68fac", "#0067a5", "#f99379", "#604e97",
  "#f6a600", "#b3446c", "#dcd300", "#882d17", "#8db600", "#654522",
  "#e25822", "#2b3d26", "#ff69b4" # Added a new color
)


In [155]:
length(levels(row_annotations$Function))

In [156]:
clipValues <- function(v, minV, maxV) {
    if (is.na(v)) {
        return(v)
    }else if (v >= 0) {
        return(min(v, maxV))
    } else if (v < 0) {
        return(max(v, minV))
    } 
}

In [157]:
# COLOR PALETTE

# Manual definition
# clrP <- colorRampPalette(c("#ffffc1", "#6b001d"))(100)
# clrN <- colorRampPalette(c("#0a2258", "#f5faff"))(100)

# Predefined color palette
clrP <- colorRampPalette(brewer.pal(n = 9, name = "YlOrRd"))(100)
clrN <- colorRampPalette(rev(brewer.pal(n = 9, name = "Blues")))(100)

clr <- c(clrN, clrP)

In [158]:
# VALUE RANGES (those values are passed to clipValues function)

# NO CONSTRAINTS
# max_value = max(heatmap_df, na.rm = T)
# min_value = min(heatmap_df, na.rm = T)

# CLIPPEND WITH QUANTILE
max_value = quantile(heatmap_df[heatmap_df >= 0], na.rm = T, probs = .99)
min_value = quantile(heatmap_df[heatmap_df < 0], na.rm = T, probs = .01)

**Define annotation palette**

In [159]:
# DiseaseGroup
subset_groups <- intersect(diseaseGroups, levels(col_annotations$diseaseGroup))
disease_group_colors <- diseaseGroups_palette[subset_groups]
# Disease
subset_diseases <- intersect(diseases, levels(col_annotations$disease))
disease_colors <- disease_palette[subset_diseases]
# Celltype
subset_celltype <- intersect(celltype_names, levels(row_annotations$CellType))
cell_type_colors <- l1_palette[subset_celltype]
# Function
function_colors <-   function_palette

# Create the annotation colors list
my_colour_annot <- list(
  diseaseGroup = setNames(disease_group_colors, subset_groups),
  disease = setNames(disease_colors, subset_diseases),
  #Function = setNames(function_colors, levels(row_annotations$Function))
  CellType = setNames(cell_type_colors, subset_celltype)
)

In [160]:
my_colour_annot

## Generate Heatmap

In [161]:
length(order_row)

In [162]:
length(order_col)

In [163]:
print(ncol(heatmap_matrix))
print(nrow(heatmap_matrix))

[1] 19
[1] 119


In [164]:
all(rownames(heatmap_df) %in% order_row)

In [165]:
all(colnames(heatmap_df) %in% order_col)

In [166]:
heatmap_df = heatmap_df[order_row, order_col]
ordered_row_annotations <- row_annotations[match(rownames(heatmap_df), row_annotations$FactorName), ]
ordered_row_annotations <- ordered_row_annotations %>%
  tibble::column_to_rownames("FactorName")

**Define scale palette**

In [167]:
clipValues <- function(v, minV, maxV) {
    if (is.na(v)) {
        return(v)
    }else if (v >= 0) {
        return(min(v, maxV))
    } else if (v < 0) {
        return(max(v, minV))
    } 
}

In [168]:
# COLOR PALETTE

# Manual definition
# clrP <- colorRampPalette(c("#ffffc1", "#6b001d"))(100)
# clrN <- colorRampPalette(c("#0a2258", "#f5faff"))(100)

# Predefined color palette
clrP <- colorRampPalette(brewer.pal(n = 9, name = "YlOrRd"))(100)
clrN <- colorRampPalette(rev(brewer.pal(n = 9, name = "Blues")))(100)

clr <- c(clrN, clrP)

In [169]:
# VALUE RANGES (those values are passed to clipValues function)

# NO CONSTRAINTS
max_v = max(heatmap_df, na.rm = T)
min_v = min(heatmap_df, na.rm = T)

# CLIPPEND WITH QUANTILE
max_value = quantile(heatmap_df[heatmap_df >= 0], na.rm = T, probs = .99)
min_value = quantile(heatmap_df[heatmap_df < 0], na.rm = T, probs = .01)

**Plot heatmap**

In [170]:
# Prepare data for heatmap
significance_data <- filtered_data %>%
  select(FactorName, disease, Pval_adj) %>%
  spread(key = disease, value = Pval_adj) 
head(significance_data)

Unnamed: 0_level_0,FactorName,asthma,BRCA,CD,cirrhosis,COPD,COVID,CRC,flu,HBV,HIV,HNSCC,MS,NPC,PS,PSA,RA,sepsis,SLE,UC
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,10-X-B-X-cytokine_and_receptors_proinflammatory,0.9653206,0.9026079,0.0549,0.35340486,0.36889328,0.7796916,0.9205398,0.1523237,0.63375522,0.1476842,0.8767391,0.2926388,0.5363341,0.0,0.0,0.0,0.49150338,0.644642,0.0
2,100-X-T_CD8_Naive-X-adhesion_molecules,0.4733892,0.3376718,0.31504782,0.41665628,0.39132512,0.0,0.9629743,0.1193478,0.17241322,0.7083413,0.7868021,0.2685394,0.9451549,0.0,0.0,0.0,0.01149738,0.9326683,0.0
3,101-X-T_CD8_Naive-X-antigen_presentation_molecules,0.6844297,0.6872426,0.0,0.30289655,0.39701695,0.7410315,0.5603586,0.8134748,0.4636212,0.5250533,0.5883865,0.7625554,0.8502212,0.0,0.0,0.0,0.19860465,0.0,0.0
4,103-X-T_CD8_NonNaive-X-CD8T_exhaustion,0.1907053,0.590802,0.26853943,0.80682477,0.77722855,0.8913523,0.5477042,0.844521,0.9734951,0.6817695,0.9097877,0.9839221,0.6502791,0.6596915,0.2049974,0.0,0.89314925,0.0,0.0
5,104-X-T_CD8_NonNaive-X-CD8T_tcr_activation,0.4969992,0.8113991,0.06787636,0.02484163,0.07718805,0.9455743,0.6112096,0.5603586,0.45001583,0.3297435,0.9455743,0.965617,0.9257537,0.7405905,0.2460578,0.06157009,0.72142462,0.0,0.0
6,105-X-T_CD8_NonNaive-X-chemokines,0.5582765,0.9331631,0.46029694,0.37172994,0.25438298,0.8931493,0.7581587,0.5218394,0.05844867,0.714716,0.965617,0.5055149,0.4915034,0.3454382,0.3681786,0.19070526,0.87123913,0.0,0.3364472


In [171]:
significance_data$FactorName <- sapply(strsplit(significance_data$FactorName, "-X-"), function(x) {
  second_element <- x[2]
  third_element <- x[3]
  
  # Check if the third element starts with "c" + any element in signatures
  if (startsWith(third_element, "c") && substr(third_element, 2, nchar(third_element)) %in% signatures) {
    third_element <- substr(third_element, 2, nchar(third_element))  # Remove the first letter "c"
  }
  
  # Create the new FactorName
  new_name <- paste0(second_element, "-X-", third_element)
  return(new_name)
})
significance_data <- significance_data %>%
  tibble::column_to_rownames("FactorName")
head(significance_data)

Unnamed: 0_level_0,asthma,BRCA,CD,cirrhosis,COPD,COVID,CRC,flu,HBV,HIV,HNSCC,MS,NPC,PS,PSA,RA,sepsis,SLE,UC
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
B-X-cytokine_and_receptors_proinflammatory,0.9653206,0.9026079,0.0549,0.35340486,0.36889328,0.7796916,0.9205398,0.1523237,0.63375522,0.1476842,0.8767391,0.2926388,0.5363341,0.0,0.0,0.0,0.49150338,0.644642,0.0
T_CD8_Naive-X-adhesion_molecules,0.4733892,0.3376718,0.31504782,0.41665628,0.39132512,0.0,0.9629743,0.1193478,0.17241322,0.7083413,0.7868021,0.2685394,0.9451549,0.0,0.0,0.0,0.01149738,0.9326683,0.0
T_CD8_Naive-X-antigen_presentation_molecules,0.6844297,0.6872426,0.0,0.30289655,0.39701695,0.7410315,0.5603586,0.8134748,0.4636212,0.5250533,0.5883865,0.7625554,0.8502212,0.0,0.0,0.0,0.19860465,0.0,0.0
T_CD8_NonNaive-X-CD8T_exhaustion,0.1907053,0.590802,0.26853943,0.80682477,0.77722855,0.8913523,0.5477042,0.844521,0.9734951,0.6817695,0.9097877,0.9839221,0.6502791,0.6596915,0.2049974,0.0,0.89314925,0.0,0.0
T_CD8_NonNaive-X-CD8T_tcr_activation,0.4969992,0.8113991,0.06787636,0.02484163,0.07718805,0.9455743,0.6112096,0.5603586,0.45001583,0.3297435,0.9455743,0.965617,0.9257537,0.7405905,0.2460578,0.06157009,0.72142462,0.0,0.0
T_CD8_NonNaive-X-chemokines,0.5582765,0.9331631,0.46029694,0.37172994,0.25438298,0.8931493,0.7581587,0.5218394,0.05844867,0.714716,0.965617,0.5055149,0.4915034,0.3454382,0.3681786,0.19070526,0.87123913,0.0,0.3364472


In [172]:
significance_data = significance_data[order_row, order_col]

In [173]:
sig_to_plot <- significance_data %>%
  #mutate(across(everything(), ~ ifelse(. < 0.05, "·", "")))
  mutate(across(everything(), ~ ifelse(is.na(.), "", ifelse(. < 0.05, "·", ""))))
head(sig_to_plot)

Unnamed: 0_level_0,cirrhosis,SLE,UC,RA,PSA,PS,CD,HIV,HBV,NPC,MS,CRC,asthma,BRCA,COPD,HNSCC,sepsis,COVID,flu
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
B-X-adhesion_molecules,,·,·,·,·,,,,,,,,,,,,·,,
Plasma-X-adhesion_molecules,,,·,·,·,,·,,,,,,,,,,·,·,
pDC-X-adhesion_molecules,,,,·,·,,,,,,,,,,,,,,
DC-X-adhesion_molecules,,·,·,·,·,·,,,,,,,,,,,,,
Mono-X-adhesion_molecules,,·,·,·,·,·,·,,·,,,,,,,,·,·,
T_CD4_Naive-X-adhesion_molecules,,,·,·,·,·,,,,,,,,,,,·,·,·


In [175]:
#options(repr.plot.width = 10, repr.plot.height = 20, repr.plot.res = 100)
breaks <- c(head(seq(min_value,0, length.out = 101), -1),seq(0, max_value, length.out = 100))
figure_path <-  paste0(base_path,"/03_downstream_analysis/06_inflammation_signatures/results/heatmap_with_dendogram.pdf")
pdf(figure_path, width = 10, height = 13)
a <- pheatmap(mat = apply(heatmap_df, c(1,2), FUN = function(x) {clipValues(x, min_value, max_value)}),
  border_color = FALSE,
  color = clr,
  breaks  =  breaks,
  display_numbers = sig_to_plot,
  fontsize_number = 10,
  na_col = "white",
  cluster_cols = TRUE,
  cluster_rows = FALSE,
  cellheight = 5,
  cellwidth = 7,
  cex = 1, 
  fontsize = 6,
  annotation_colors = my_colour_annot,
  annotation_col = col_annotations,
  annotation_row = ordered_row_annotations,
  #gaps_col = cumsum(table(col_annotations$diseaseGroup)),
  #gaps_row = cumsum(table(row_annotations$Function)),
  #gaps_col = c(7, 10, 11, 15, 19),
  gaps_row = c(11, 22, 33, 44, 55, 66, 77, 88, 99, 101, 103, 105, 107, 109, 111, 113, 114, 115, 116, 117), # GAP ROW FOR FUNCTIONS
  #gaps_row = c(10, 19, 28, 38, 49, 63, 77, 88, 99, 108), # GAP ROW FOR CELLTYPES w/ global
  #gaps_row = c(10, 19, 28, 39, 51, 65, 79, 90, 101, 110), # GAP ROW FOR CELLTYPES
  filename = figure_path,
  legend = TRUE,
  legend_breaks = c(max_value, -4, -2, -1, 0, 1, 2, 4, 6, min_value),
  #legend_labels = names(my_colors),
  height = 18, 
  width = 15
)
print(a)
dev.off()