In [1]:
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(data.table))
suppressPackageStartupMessages(require(RColorBrewer))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(purrr))
suppressPackageStartupMessages(library(circlize))

# Parameters

In [2]:
# Input
base_path <- "/scratch_isilon/groups/singlecell/shared/projects/Inflammation-PBMCs-Atlas"
UnCorrected_logFC_NonFilt_inputpath <- paste0(base_path, "/03_downstream_analysis/06_inflammation_signatures/results/DecoupleR_ulmestimates_mixedmlEval_L1_UnCorr.csv")
disease_metadata_inputpath <-  paste0(base_path,"/03_downstream_analysis/06_inflammation_signatures/results/disease_metadata.csv")

# Arguments
filter_by_pval <- 0.05

width <- 10
height <- 15
resol <- 400
cellheight <- 15
cellwidth <- 20
fontsize <- 12

# UnCorrected

## Load

In [3]:
# Load -------------------------
metadata <- read.csv(disease_metadata_inputpath, header = TRUE, row.names = 1)
UnCorrectedNoFilt_logFC <- read.csv(UnCorrected_logFC_NonFilt_inputpath, header = TRUE)

In [4]:
print(head(UnCorrectedNoFilt_logFC))

                                                    X  Coef. Std.Err.      z
1  C(disease, Treatment(reference="healthy"))[T.BRCA]  0.257    0.791  0.325
2    C(disease, Treatment(reference="healthy"))[T.CD] -0.248    0.243 -1.020
3  C(disease, Treatment(reference="healthy"))[T.COPD]  0.569    0.354  1.607
4 C(disease, Treatment(reference="healthy"))[T.COVID] -0.345    0.191 -1.809
5   C(disease, Treatment(reference="healthy"))[T.CRC] -0.386    0.741 -0.522
6   C(disease, Treatment(reference="healthy"))[T.HBV]  0.150    0.511  0.294
  P..z. X.0.025 X0.975. AnnotationLevel CellType
1 0.745  -1.292   1.807          Level1        B
2 0.308  -0.726   0.229          Level1        B
3 0.108  -0.125   1.262          Level1        B
4 0.070  -0.718   0.029          Level1        B
5 0.602  -1.838   1.065          Level1        B
6 0.769  -0.851   1.152          Level1        B
                                       FactorName disease  Pval_adj
1 10-X-B-X-cytokine_and_receptors_proinflammatory 

In [5]:
unique(UnCorrectedNoFilt_logFC$disease)

In [6]:
length(unique(UnCorrectedNoFilt_logFC$disease))

In [7]:
length(unique(UnCorrectedNoFilt_logFC$FactorName))

In [8]:
#metadata <- metadata %>% filter(disease != "BRCA")
print(metadata)

     disease         diseaseGroup
0       BRCA          solid_tumor
1         CD                IMIDs
2       COPD chronic_inflammation
3      COVID            infection
4        CRC          solid_tumor
5        HBV            infection
6        HIV            infection
7      HNSCC          solid_tumor
8         MS                IMIDs
9        NPC          solid_tumor
10        PS                IMIDs
11       PSA                IMIDs
12        RA                IMIDs
13       SLE                IMIDs
14        UC                IMIDs
15    asthma chronic_inflammation
16 cirrhosis chronic_inflammation
17       flu            infection
18   healthy              healthy
19    sepsis   acute_inflammation


In [9]:
signatures <- c('cytokine_and_receptors_proinflammatory',
               'adhesion_molecules',
               'antigen_presentation_molecules',
               'CD8T_exhaustion',
               'CD8T_tcr_activation',
               'chemokines',
               'chemokine_receptors',
               #'cytokine_and_receptors__antiinflammatory',
               'cytokine_andreceptors_antiinflammatory',
               'IFN_Type_1_2_Lambda',
               'IFN_response',
               'TNF_receptors_ligands',
               'antigen-crosspresentation',
               'IFNG_response',
               'IL4-IL13_response',
               'CD4T_TH17_UP',
               'CD4T_TH2_UP',
               'CD4T_TFH_UP',
               'CD4T_TH1_UP',
               'Tregs_FoxP3_stabilization',
               'effector')

## Preprocessing

**Extract information fron imput DF**

In [10]:
# Preprocess data
# Filter rows based on Pval_adj
#filtered_data <- UnCorrected_logFC %>%
#  mutate(Coef = ifelse(Pval_adj >= filter_by_pval, 0, Coef.))


# Extract Factor function and Celltype
filtered_data <- UnCorrectedNoFilt_logFC %>%
  mutate(Function = sapply(strsplit(as.character(FactorName), "-X-"), `[`, 3),
         CellType = sapply(strsplit(as.character(FactorName), "-X-"), `[`, 2))

In [11]:
head(filtered_data)

Unnamed: 0_level_0,X,Coef.,Std.Err.,z,P..z.,X.0.025,X0.975.,AnnotationLevel,CellType,FactorName,disease,Pval_adj,Function
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<dbl>,<chr>
1,"C(disease, Treatment(reference=""healthy""))[T.BRCA]",0.257,0.791,0.325,0.745,-1.292,1.807,Level1,B,10-X-B-X-cytokine_and_receptors_proinflammatory,BRCA,0.8711502,cytokine_and_receptors_proinflammatory
2,"C(disease, Treatment(reference=""healthy""))[T.CD]",-0.248,0.243,-1.02,0.308,-0.726,0.229,Level1,B,10-X-B-X-cytokine_and_receptors_proinflammatory,CD,0.5613012,cytokine_and_receptors_proinflammatory
3,"C(disease, Treatment(reference=""healthy""))[T.COPD]",0.569,0.354,1.607,0.108,-0.125,1.262,Level1,B,10-X-B-X-cytokine_and_receptors_proinflammatory,COPD,0.296831,cytokine_and_receptors_proinflammatory
4,"C(disease, Treatment(reference=""healthy""))[T.COVID]",-0.345,0.191,-1.809,0.07,-0.718,0.029,Level1,B,10-X-B-X-cytokine_and_receptors_proinflammatory,COVID,0.2214986,cytokine_and_receptors_proinflammatory
5,"C(disease, Treatment(reference=""healthy""))[T.CRC]",-0.386,0.741,-0.522,0.602,-1.838,1.065,Level1,B,10-X-B-X-cytokine_and_receptors_proinflammatory,CRC,0.7911382,cytokine_and_receptors_proinflammatory
6,"C(disease, Treatment(reference=""healthy""))[T.HBV]",0.15,0.511,0.294,0.769,-0.851,1.152,Level1,B,10-X-B-X-cytokine_and_receptors_proinflammatory,HBV,0.8843724,cytokine_and_receptors_proinflammatory


In [12]:
# Prepare data for heatmap
heatmap_data <- filtered_data %>%
  select(FactorName, disease, Coef.) %>%
  spread(key = disease, value = Coef.)
head(heatmap_data)

Unnamed: 0_level_0,FactorName,asthma,BRCA,CD,cirrhosis,COPD,COVID,CRC,flu,HBV,HIV,HNSCC,MS,NPC,PS,PSA,RA,sepsis,SLE,UC
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,10-X-B-X-cytokine_and_receptors_proinflammatory,-0.552,0.257,-0.248,0.761,0.569,-0.345,-0.386,-0.23,0.15,-1.79,0.168,0.46,0.768,-0.865,-0.635,-0.408,-0.19,0.247,-0.864
2,100-X-T_CD8_Naive-X-adhesion_molecules,0.345,2.262,0.144,1.238,-0.217,-1.27,-1.171,-1.573,1.873,-0.24,0.344,0.862,1.56,0.972,1.446,1.424,-1.454,-0.192,0.849
3,101-X-T_CD8_Naive-X-antigen_presentation_molecules,-0.044,3.101,1.242,2.615,0.601,-0.89,2.429,-2.373,1.559,-2.34,0.923,0.058,-1.365,1.396,1.854,2.042,-1.418,0.707,1.966
4,103-X-T_CD8_NonNaive-X-CD8T_exhaustion,1.011,0.466,0.297,0.655,0.857,-0.117,-1.854,0.203,-0.686,0.833,-0.195,-0.054,0.32,0.259,0.509,0.856,0.152,1.021,0.789
5,104-X-T_CD8_NonNaive-X-CD8T_tcr_activation,0.139,-0.774,0.387,2.066,0.95,0.036,-1.432,-0.04,-1.05,-0.867,0.183,-0.262,-0.542,-0.068,0.448,0.53,-0.042,0.949,1.134
6,105-X-T_CD8_NonNaive-X-chemokines,0.274,-0.738,0.743,1.366,0.57,0.074,-1.23,0.182,-3.385,-0.946,-0.46,-1.247,-2.861,-0.384,0.594,0.947,-0.077,0.776,0.627


In [13]:
heatmap_data$FactorName <- sapply(strsplit(heatmap_data$FactorName, "-X-"), function(x) {
  second_element <- x[2]
  third_element <- x[3]
  
  # Check if the third element starts with "c" + any element in signatures
  if (startsWith(third_element, "c") && substr(third_element, 2, nchar(third_element)) %in% signatures) {
    third_element <- substr(third_element, 2, nchar(third_element))  # Remove the first letter "c"
  }
  
  # Create the new FactorName
  new_name <- paste0(second_element, "-X-", third_element)
  return(new_name)
})
head(heatmap_data)

Unnamed: 0_level_0,FactorName,asthma,BRCA,CD,cirrhosis,COPD,COVID,CRC,flu,HBV,HIV,HNSCC,MS,NPC,PS,PSA,RA,sepsis,SLE,UC
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,B-X-cytokine_and_receptors_proinflammatory,-0.552,0.257,-0.248,0.761,0.569,-0.345,-0.386,-0.23,0.15,-1.79,0.168,0.46,0.768,-0.865,-0.635,-0.408,-0.19,0.247,-0.864
2,T_CD8_Naive-X-adhesion_molecules,0.345,2.262,0.144,1.238,-0.217,-1.27,-1.171,-1.573,1.873,-0.24,0.344,0.862,1.56,0.972,1.446,1.424,-1.454,-0.192,0.849
3,T_CD8_Naive-X-antigen_presentation_molecules,-0.044,3.101,1.242,2.615,0.601,-0.89,2.429,-2.373,1.559,-2.34,0.923,0.058,-1.365,1.396,1.854,2.042,-1.418,0.707,1.966
4,T_CD8_NonNaive-X-CD8T_exhaustion,1.011,0.466,0.297,0.655,0.857,-0.117,-1.854,0.203,-0.686,0.833,-0.195,-0.054,0.32,0.259,0.509,0.856,0.152,1.021,0.789
5,T_CD8_NonNaive-X-CD8T_tcr_activation,0.139,-0.774,0.387,2.066,0.95,0.036,-1.432,-0.04,-1.05,-0.867,0.183,-0.262,-0.542,-0.068,0.448,0.53,-0.042,0.949,1.134
6,T_CD8_NonNaive-X-chemokines,0.274,-0.738,0.743,1.366,0.57,0.074,-1.23,0.182,-3.385,-0.946,-0.46,-1.247,-2.861,-0.384,0.594,0.947,-0.077,0.776,0.627


**Edit function name**

**Generate heatmap DF**

In [14]:
heatmap_matrix <- as.matrix(heatmap_data[,-1])
rownames(heatmap_matrix) <- heatmap_data$FactorName
colnames(heatmap_matrix) <- colnames(heatmap_data)[2:length(colnames(heatmap_data))]
#heatmap_matrix <- t(apply(heatmap_matrix, 1, scale))
#heatmap_matrix[is.na(heatmap_matrix)] <- 0


heatmap_df <- as.data.frame(heatmap_matrix)
rownames(heatmap_df) <- heatmap_data$FactorName
colnames(heatmap_df) <- colnames(heatmap_data)[2:length(colnames(heatmap_data))]
head(heatmap_df)

Unnamed: 0_level_0,asthma,BRCA,CD,cirrhosis,COPD,COVID,CRC,flu,HBV,HIV,HNSCC,MS,NPC,PS,PSA,RA,sepsis,SLE,UC
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
B-X-cytokine_and_receptors_proinflammatory,-0.552,0.257,-0.248,0.761,0.569,-0.345,-0.386,-0.23,0.15,-1.79,0.168,0.46,0.768,-0.865,-0.635,-0.408,-0.19,0.247,-0.864
T_CD8_Naive-X-adhesion_molecules,0.345,2.262,0.144,1.238,-0.217,-1.27,-1.171,-1.573,1.873,-0.24,0.344,0.862,1.56,0.972,1.446,1.424,-1.454,-0.192,0.849
T_CD8_Naive-X-antigen_presentation_molecules,-0.044,3.101,1.242,2.615,0.601,-0.89,2.429,-2.373,1.559,-2.34,0.923,0.058,-1.365,1.396,1.854,2.042,-1.418,0.707,1.966
T_CD8_NonNaive-X-CD8T_exhaustion,1.011,0.466,0.297,0.655,0.857,-0.117,-1.854,0.203,-0.686,0.833,-0.195,-0.054,0.32,0.259,0.509,0.856,0.152,1.021,0.789
T_CD8_NonNaive-X-CD8T_tcr_activation,0.139,-0.774,0.387,2.066,0.95,0.036,-1.432,-0.04,-1.05,-0.867,0.183,-0.262,-0.542,-0.068,0.448,0.53,-0.042,0.949,1.134
T_CD8_NonNaive-X-chemokines,0.274,-0.738,0.743,1.366,0.57,0.074,-1.23,0.182,-3.385,-0.946,-0.46,-1.247,-2.861,-0.384,0.594,0.947,-0.077,0.776,0.627


**Add missing diseases**

In [15]:
colnames(heatmap_df)

In [16]:
unique(metadata$disease)

In [17]:
for (disease in unique(metadata$disease)) {
  if (!(disease %in% colnames(heatmap_df))) {
    heatmap_df[[disease]] <- NA
  }
}

**Disease order**

In [18]:
disease_order = c(
  'RA','PS', 'PSA', 'CD','UC','SLE', 'MS',
    
  'asthma', 'COPD', 'cirrhosis', 
    
  'sepsis', 
    
  'HIV', 'HBV', 'COVID', 'flu', 
    
  'BRCA', 'NPC', 'HNSCC', 'CRC' 
)

**Celltype order**

In [19]:
heatmap_celltypes_order= c(
  'B','Plasma',
    
  'pDC', 'DC', 'Mono', 
        
  'T_CD4_Naive', 'T_CD4_NonNaive', 'T_CD8_Naive', 'T_CD8_NonNaive', 
    
  'UTC', 'ILC'
)

**Functions order**

In [20]:
functions_order <- c('adhesion_molecules',
    
                'antigen_presentation_molecules',
    
                'cytokine_and_receptors_proinflammatory',
                #'cytokine_and_receptors__antiinflammatory',
                'cytokine_andreceptors_antiinflammatory',


               'chemokines',
               'chemokine_receptors',

               'IFN_Type_1_2_Lambda',
               'IFN_response',
               'TNF_receptors_ligands',

               'CD4T_TFH_UP',
               'CD4T_TH1_UP',
               'CD4T_TH2_UP',
               'CD4T_TH17_UP',
               
               'Tregs_FoxP3_stabilization',

               'CD8T_exhaustion',
               'CD8T_tcr_activation',
                
               'effector',
                
               'IFNG_response',
               'IL4-IL13_response',

               'antigen-crosspresentation'
   
               )

## Color Annotations

In [21]:
# Prepare annotations
# Column annotations
col_annotations <- metadata %>%
  arrange(diseaseGroup) %>%
  filter(diseaseGroup != "healthy") %>%
  mutate(diseaseGroup = as.factor(diseaseGroup))
col_annotations <- col_annotations[match(disease_order, col_annotations$disease), ]
rownames(col_annotations) <- NULL
order_col <- col_annotations$disease
col_annotations

disease,diseaseGroup
<chr>,<fct>
RA,IMIDs
PS,IMIDs
PSA,IMIDs
CD,IMIDs
UC,IMIDs
SLE,IMIDs
MS,IMIDs
asthma,chronic_inflammation
COPD,chronic_inflammation
cirrhosis,chronic_inflammation


In [22]:
col_annotations <- col_annotations %>%
  tibble::column_to_rownames("disease")

In [23]:
col_annotations$disease <- factor(rownames(col_annotations), levels = unique(rownames(col_annotations)))

In [24]:
row_annotDF <- heatmap_data %>%
  mutate(Function = sapply(strsplit(as.character(FactorName), "-X-"), `[`, 2),
         CellType = sapply(strsplit(as.character(FactorName), "-X-"), `[`, 1))

In [25]:
# Row annotations
row_annotations <- row_annotDF %>%
  select(FactorName, Function, CellType) %>%
  distinct() %>%
  mutate(Function = as.factor(Function),
         CellType = as.factor(CellType))
row_annotations$CellType <- factor(row_annotations$CellType, levels = heatmap_celltypes_order)
row_annotations$Function <- factor(row_annotations$Function, levels = functions_order)

row_annotations <- row_annotations %>%
  arrange(Function, CellType)
  #arrange(CellType, Function)
order_row <- row_annotations$FactorName
head(row_annotations)

Unnamed: 0_level_0,FactorName,Function,CellType
Unnamed: 0_level_1,<chr>,<fct>,<fct>
1,B-X-adhesion_molecules,adhesion_molecules,B
2,Plasma-X-adhesion_molecules,adhesion_molecules,Plasma
3,pDC-X-adhesion_molecules,adhesion_molecules,pDC
4,DC-X-adhesion_molecules,adhesion_molecules,DC
5,Mono-X-adhesion_molecules,adhesion_molecules,Mono
6,T_CD4_Naive-X-adhesion_molecules,adhesion_molecules,T_CD4_Naive


In [26]:
row_annotations <- row_annotations %>% select(-Function)

In [27]:
length(unique(row_annotations$Function))

### Define Palettes

**Celltypes**

In [28]:
annotation_Level1_palette <- list(
  'B' = '#7bc6d6',
  'Plasma' = '#025566',
    
  'pDC' = '#a7c957',
  'DC' = '#6a994e',
  'Mono' = '#386641',
    
  'T_CD4_Naive' = '#fff3b0',
  'T_CD4_NonNaive' = '#e09f3e',
  'T_CD8_Naive' = '#9e2a2b',
  'T_CD8_NonNaive' = '#540b0e',
    
  'UTC' = '#88657f',
  'ILC' = '#67253a',
    
  'Cycling_cells' = '#d4a373',
  'Progenitors' = '#ccd5ae',
    
  'Platelets' = '#808080',  # To remove
  'RBC' = '#000000'         # To remove
)
celltype_names <- names(annotation_Level1_palette)
l1_palette <- unlist(annotation_Level1_palette)

**Diseases**

In [29]:
# Create the named list
diseases_palette <- list(
  'healthy' = "#808080",
    
  'RA' = '#264653',
  'PS' = '#287271',
  'PSA' = '#2a9d8f',
  'CD' = '#e76f51',
  'UC' = '#e9c46a',
  'SLE' = '#941c2f', 
  'MS' = '#8ab17d',
    
  'asthma' = '#ea698b',
  'COPD' = '#c05299',
  'cirrhosis' = '#973aa8',
    
  'sepsis' = '#ef233c',
    
  'HIV' = '#e7ecef',
  'HBV' = '#a3cef1',
  'COVID' = '#6096ba', 
  'flu' = '#274c77', 
    
  'BRCA' = '#fff75e',
  'NPC' = '#fdb833',
  'HNSCC' = '#d9981a',
  'CRC' = '#9e7524'
)

# Generate vectors for keys and values
diseases <- names(diseases_palette)
disease_palette <- unlist(diseases_palette)

**DiseaseGroup**

In [30]:
diseasesGroup_palette <- list(
  'IMIDs' = '#2a9d8f',
  'solid_tumor' = '#e3a52d',
  'chronic_inflammation' = '#ffafcc',
  'acute_inflammation' = '#ef233c',
  'infection' = '#abc4ff',
  'healthy' = '#808080'
)

# Generate vectors for keys and values
diseaseGroups <- names(diseasesGroup_palette)
diseaseGroups_palette <- unlist(diseasesGroup_palette)

**Functions**

In [31]:
function_palette <- c(
  "#f3c300", "#875692", "#f38400", "#a1caf1", "#be0032", "#c2b280",
  "#848482", "#008856", "#e68fac", "#0067a5", "#f99379", "#604e97",
  "#f6a600", "#b3446c", "#dcd300", "#882d17", "#8db600", "#654522",
  "#e25822", "#2b3d26", "#ff69b4" # Added a new color
)


In [32]:
length(levels(row_annotations$Function))

**Generate breaks for scale**

**Define scale palette**

In [33]:
max(unlist(heatmap_df), na.rm = TRUE)

In [34]:
min(unlist(heatmap_df), na.rm = TRUE)

In [35]:
clipValues <- function(v, minV, maxV) {
    if (is.na(v)) {
        return(v)
    }else if (v >= 0) {
        return(min(v, maxV))
    } else if (v < 0) {
        return(max(v, minV))
    } 
}

In [36]:
# COLOR PALETTE

# Manual definition
# clrP <- colorRampPalette(c("#ffffc1", "#6b001d"))(100)
# clrN <- colorRampPalette(c("#0a2258", "#f5faff"))(100)

# Predefined color palette
clrP <- colorRampPalette(brewer.pal(n = 9, name = "YlOrRd"))(100)
clrN <- colorRampPalette(rev(brewer.pal(n = 9, name = "Blues")))(100)

clr <- c(clrN, clrP)

In [37]:
# VALUE RANGES (those values are passed to clipValues function)

# NO CONSTRAINTS
# max_value = max(heatmap_df, na.rm = T)
# min_value = min(heatmap_df, na.rm = T)

# CLIPPEND WITH QUANTILE
max_value = quantile(heatmap_df[heatmap_df >= 0], na.rm = T, probs = .99)
min_value = quantile(heatmap_df[heatmap_df < 0], na.rm = T, probs = .01)

**Define annotation palette**

In [38]:
# DiseaseGroup
subset_groups <- intersect(diseaseGroups, levels(col_annotations$diseaseGroup))
disease_group_colors <- diseaseGroups_palette[subset_groups]
# Disease
subset_diseases <- intersect(diseases, levels(col_annotations$disease))
disease_colors <- disease_palette[subset_diseases]
# Celltype
subset_celltype <- intersect(celltype_names, levels(row_annotations$CellType))
cell_type_colors <- l1_palette[subset_celltype]
# Function
function_colors <-   function_palette

# Create the annotation colors list
my_colour_annot <- list(
  diseaseGroup = setNames(disease_group_colors, subset_groups),
  disease = setNames(disease_colors, subset_diseases),
  #Function = setNames(function_colors, levels(row_annotations$Function))
  CellType = setNames(cell_type_colors, subset_celltype)
)

In [39]:
my_colour_annot

## Generate Heatmap

In [40]:
length(order_row)

In [41]:
length(order_col)

In [42]:
print(ncol(heatmap_matrix))
print(nrow(heatmap_matrix))

[1] 19
[1] 119


In [43]:
all(rownames(heatmap_df) %in% order_row)

In [44]:
all(colnames(heatmap_df) %in% order_col)

In [45]:
heatmap_df = heatmap_df[order_row, order_col]
ordered_row_annotations <- row_annotations[match(rownames(heatmap_df), row_annotations$FactorName), ]
ordered_row_annotations <- ordered_row_annotations %>%
  tibble::column_to_rownames("FactorName")

**Define scale palette**

In [46]:
clipValues <- function(v, minV, maxV) {
    if (is.na(v)) {
        return(v)
    }else if (v >= 0) {
        return(min(v, maxV))
    } else if (v < 0) {
        return(max(v, minV))
    } 
}

In [47]:
# COLOR PALETTE

# Manual definition
# clrP <- colorRampPalette(c("#ffffc1", "#6b001d"))(100)
# clrN <- colorRampPalette(c("#0a2258", "#f5faff"))(100)

# Predefined color palette
clrP <- colorRampPalette(brewer.pal(n = 9, name = "YlOrRd"))(100)
clrN <- colorRampPalette(rev(brewer.pal(n = 9, name = "Blues")))(100)

clr <- c(clrN, clrP)

In [48]:
# VALUE RANGES (those values are passed to clipValues function)

# NO CONSTRAINTS
max_v = max(heatmap_df, na.rm = T)
min_v = min(heatmap_df, na.rm = T)

# CLIPPEND WITH QUANTILE
max_value = quantile(heatmap_df[heatmap_df >= 0], na.rm = T, probs = .99)
min_value = quantile(heatmap_df[heatmap_df < 0], na.rm = T, probs = .01)

**Plot heatmap**

In [49]:
# Prepare data for heatmap
significance_data <- filtered_data %>%
  select(FactorName, disease, Pval_adj) %>%
  spread(key = disease, value = Pval_adj) 
head(significance_data)

Unnamed: 0_level_0,FactorName,asthma,BRCA,CD,cirrhosis,COPD,COVID,CRC,flu,HBV,HIV,HNSCC,MS,NPC,PS,PSA,RA,sepsis,SLE,UC
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,10-X-B-X-cytokine_and_receptors_proinflammatory,0.3186878,0.8711502,0.5613012,0.5652,0.296831,0.22149856,0.7911382,0.6851157,0.8843724,0.02348663,0.8062733,0.5856,0.6057356,0.0,0.03237346,0.19110229,0.59136378,0.1001825,0.0
2,100-X-T_CD8_Naive-X-adhesion_molecules,0.671043,0.4804394,0.814707,0.72208897,0.8042963,0.0,0.7361434,0.0,0.0450205,0.89719409,0.6834025,0.3765265,0.6890283,0.007038462,0.0,0.0,0.0,0.386687,0.02837209
3,101-X-T_CD8_Naive-X-antigen_presentation_molecules,0.9639208,0.4315185,0.0,0.51511111,0.4877732,0.02348663,0.5531347,0.0,0.2214986,0.13630345,0.2416506,0.9653315,0.7921286,0.0,0.0,0.0,0.007038462,0.0,0.0
4,103-X-T_CD8_NonNaive-X-CD8T_exhaustion,0.1278095,0.861526,0.6103686,0.79713377,0.2214986,0.82207279,0.3684898,0.8042963,0.5669412,0.64794767,0.8405506,0.9652066,0.9124225,0.648962428,0.29040302,0.01835097,0.770408786,0.0,0.07126978
5,104-X-T_CD8_NonNaive-X-CD8T_tcr_activation,0.8711502,0.6515431,0.4423117,0.09353333,0.104,0.93717372,0.3000745,0.9554636,0.2196,0.50692155,0.8320635,0.8274417,0.78438,0.903973671,0.3167824,0.16240514,0.929324853,0.0,0.0
6,105-X-T_CD8_NonNaive-X-chemokines,0.833137,0.8314588,0.2776877,0.65136933,0.6154709,0.91554955,0.6871939,0.8713915,0.0,0.68340249,0.6937062,0.3820175,0.3267857,0.611104072,0.39214286,0.07451497,0.91683,0.0,0.38170297


In [50]:
significance_data$FactorName <- sapply(strsplit(significance_data$FactorName, "-X-"), function(x) {
  second_element <- x[2]
  third_element <- x[3]
  
  # Check if the third element starts with "c" + any element in signatures
  if (startsWith(third_element, "c") && substr(third_element, 2, nchar(third_element)) %in% signatures) {
    third_element <- substr(third_element, 2, nchar(third_element))  # Remove the first letter "c"
  }
  
  # Create the new FactorName
  new_name <- paste0(second_element, "-X-", third_element)
  return(new_name)
})
significance_data <- significance_data %>%
  tibble::column_to_rownames("FactorName")
head(significance_data)

Unnamed: 0_level_0,asthma,BRCA,CD,cirrhosis,COPD,COVID,CRC,flu,HBV,HIV,HNSCC,MS,NPC,PS,PSA,RA,sepsis,SLE,UC
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
B-X-cytokine_and_receptors_proinflammatory,0.3186878,0.8711502,0.5613012,0.5652,0.296831,0.22149856,0.7911382,0.6851157,0.8843724,0.02348663,0.8062733,0.5856,0.6057356,0.0,0.03237346,0.19110229,0.59136378,0.1001825,0.0
T_CD8_Naive-X-adhesion_molecules,0.671043,0.4804394,0.814707,0.72208897,0.8042963,0.0,0.7361434,0.0,0.0450205,0.89719409,0.6834025,0.3765265,0.6890283,0.007038462,0.0,0.0,0.0,0.386687,0.02837209
T_CD8_Naive-X-antigen_presentation_molecules,0.9639208,0.4315185,0.0,0.51511111,0.4877732,0.02348663,0.5531347,0.0,0.2214986,0.13630345,0.2416506,0.9653315,0.7921286,0.0,0.0,0.0,0.007038462,0.0,0.0
T_CD8_NonNaive-X-CD8T_exhaustion,0.1278095,0.861526,0.6103686,0.79713377,0.2214986,0.82207279,0.3684898,0.8042963,0.5669412,0.64794767,0.8405506,0.9652066,0.9124225,0.648962428,0.29040302,0.01835097,0.770408786,0.0,0.07126978
T_CD8_NonNaive-X-CD8T_tcr_activation,0.8711502,0.6515431,0.4423117,0.09353333,0.104,0.93717372,0.3000745,0.9554636,0.2196,0.50692155,0.8320635,0.8274417,0.78438,0.903973671,0.3167824,0.16240514,0.929324853,0.0,0.0
T_CD8_NonNaive-X-chemokines,0.833137,0.8314588,0.2776877,0.65136933,0.6154709,0.91554955,0.6871939,0.8713915,0.0,0.68340249,0.6937062,0.3820175,0.3267857,0.611104072,0.39214286,0.07451497,0.91683,0.0,0.38170297


In [51]:
significance_data = significance_data[order_row, order_col]

In [52]:
sig_to_plot <- significance_data %>%
  #mutate(across(everything(), ~ ifelse(. < 0.05, "·", "")))
  mutate(across(everything(), ~ ifelse(is.na(.), "", ifelse(. < 0.05, "·", ""))))
head(sig_to_plot)

Unnamed: 0_level_0,RA,PS,PSA,CD,UC,SLE,MS,asthma,COPD,cirrhosis,sepsis,HIV,HBV,COVID,flu,BRCA,NPC,HNSCC,CRC
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
B-X-adhesion_molecules,·,·,·,,,,,,,,,,,,,,,,
Plasma-X-adhesion_molecules,,,,,,,,,,,,,,,,,,,
pDC-X-adhesion_molecules,,,,,,,,,,,,,,,,,,,
DC-X-adhesion_molecules,·,·,·,,,,,,,,,,,,,,,,
Mono-X-adhesion_molecules,·,·,·,,,,,,,,·,,·,·,·,,,,
T_CD4_Naive-X-adhesion_molecules,·,·,·,,·,,,,,,·,,·,·,·,,,,


In [59]:
#options(repr.plot.width = 10, repr.plot.height = 20, repr.plot.res = 100)
breaks <- c(head(seq(min_value,0, length.out = 101), -1),seq(0, max_value, length.out = 100))
#figure_path <-  paste0(base_path,"/03_downstream_analysis/06_inflammation_signatures/results/heatmap/heatmap_not_normalized_UnCorrected_noFilt_celltypeOrder.pdf")
figure_path <-  paste0(base_path,"/03_downstream_analysis/06_inflammation_signatures/results/heatmap/heatmap_not_normalized_UnCorrected_noFilt_functionOrder.pdf")
pdf(figure_path, width = 10, height = 10)
a <- pheatmap(mat = apply(heatmap_df, c(1,2), FUN = function(x) {clipValues(x, min_value, max_value)}),
  border_color = FALSE,
  color = clr,
  breaks  =  breaks,
  display_numbers = sig_to_plot,
  fontsize_number = 10,
  na_col = "white",
  cluster_cols = FALSE,
  cluster_rows = FALSE,
  cellheight = 5,
  cellwidth = 7,
  cex = 1, 
  fontsize = 6,
  annotation_colors = my_colour_annot,
  annotation_col = col_annotations,
  annotation_row = ordered_row_annotations,
  #gaps_col = cumsum(table(col_annotations$diseaseGroup)),
  #gaps_row = cumsum(table(row_annotations$Function)),
  gaps_col = c(7, 10, 11, 15, 19),
  gaps_row = c(11, 22, 33, 44, 55, 66, 77, 88, 99, 101, 103, 105, 107, 109, 111, 113, 114, 115, 116, 117), # GAP ROW FOR FUNCTIONS
  #gaps_row = c(10, 19, 28, 38, 49, 63, 77, 88, 99, 108), # GAP ROW FOR CELLTYPES w/ global
  #gaps_row = c(10, 19, 28, 39, 51, 65, 79, 90, 101, 110), # GAP ROW FOR CELLTYPES
  filename = figure_path,
  legend = TRUE,
  legend_breaks = c(max_value, -4, -2, -1, 0, 1, 2, 4, 6, min_value),
  #legend_labels = names(my_colors),
  height = 30, 
  width = 15
)
print(a)
dev.off()