In [1]:
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(data.table))
suppressPackageStartupMessages(require(RColorBrewer))
suppressPackageStartupMessages(library(pheatmap))
suppressPackageStartupMessages(library(tidyr))
suppressPackageStartupMessages(library(purrr))
suppressPackageStartupMessages(library(circlize))

# Parameters

In [2]:
# Input
base_path <- "/scratch_isilon/groups/singlecell/shared/projects/Inflammation-PBMCs-Atlas"
corrected_logFC_inputpath <- paste0(base_path, "/03_downstream_analysis/06_inflammation_signatures/results/DecoupleR_ulmestimates_mixedmlEvalFilt_L2_Corr.csv")
corrected_logFC_NonFilt_inputpath <- paste0(base_path, "/03_downstream_analysis/06_inflammation_signatures/results/DecoupleR_ulmestimates_mixedmlEval_L2_Corr.csv")
disease_metadata_inputpath <-  paste0(base_path,"/03_downstream_analysis/06_inflammation_signatures/results/disease_metadata.csv")

# Arguments
filter_by_pval <- 0.05

width <- 10
height <- 15
resol <- 400
cellheight <- 15
cellwidth <- 20
fontsize <- 12

## Load

In [3]:
# Load -------------------------
metadata <- read.csv(disease_metadata_inputpath, header = TRUE, row.names = 1)
correctedNoFilt_logFC <- read.csv(corrected_logFC_NonFilt_inputpath, header = TRUE)

In [4]:
print(head(correctedNoFilt_logFC))

                                                    X  Coef. Std.Err.      z
1  C(disease, Treatment(reference="healthy"))[T.BRCA]  0.616    0.385  1.601
2 C(disease, Treatment(reference="healthy"))[T.COVID] -0.225    0.230 -0.978
3   C(disease, Treatment(reference="healthy"))[T.CRC]  0.716    0.273  2.619
4   C(disease, Treatment(reference="healthy"))[T.HBV] -0.291    0.314 -0.927
5   C(disease, Treatment(reference="healthy"))[T.NPC] -0.616    0.329 -1.870
6    C(disease, Treatment(reference="healthy"))[T.UC] -1.610    0.798 -2.016
  P..z. X.0.025 X0.975. AnnotationLevel CellType_Level1 CellType_Level2
1 0.109  -0.138   1.370          Level2               B  B_IFNresponder
2 0.328  -0.677   0.226          Level2               B  B_IFNresponder
3 0.009   0.180   1.251          Level2               B  B_IFNresponder
4 0.354  -0.907   0.324          Level2               B  B_IFNresponder
5 0.061  -1.262   0.029          Level2               B  B_IFNresponder
6 0.044  -3.175  -0.045      

In [5]:
unique(correctedNoFilt_logFC$disease)

In [6]:
length(unique(correctedNoFilt_logFC$disease))

In [7]:
length(unique(correctedNoFilt_logFC$FactorName))

In [8]:
#metadata <- metadata %>% filter(disease != "BRCA")
print(metadata)

     disease         diseaseGroup
0       BRCA          solid_tumor
1         CD                IMIDs
2       COPD chronic_inflammation
3      COVID            infection
4        CRC          solid_tumor
5        HBV            infection
6        HIV            infection
7      HNSCC          solid_tumor
8         MS                IMIDs
9        NPC          solid_tumor
10        PS                IMIDs
11       PSA                IMIDs
12        RA                IMIDs
13       SLE                IMIDs
14        UC                IMIDs
15    asthma chronic_inflammation
16 cirrhosis chronic_inflammation
17       flu            infection
18   healthy              healthy
19    sepsis   acute_inflammation


In [9]:
signatures <- c()

## Preprocessing

**Extract information fron imput DF**

In [10]:
# Preprocess data
# Filter rows based on only T_CD8_NonNaive
filtered_data <- subset(correctedNoFilt_logFC, CellType_Level1 == "T_CD8_NonNaive")
filtered_data <- subset(filtered_data, disease %in% c("RA", "PSA", "PS", "UC", "CD", "SLE"))
#filtered_data <- subset(filtered_data, disease == "IFN_Type_1_2_Lambda")

# Extract Factor function and Celltype
filtered_data <- filtered_data %>%
  mutate(Function = sapply(strsplit(as.character(FactorName), "-X-"), `[`, 3))
filtered_data <- subset(filtered_data, Function == "IFN_Type_1_2_Lambda")
head(filtered_data)

Unnamed: 0_level_0,X,Coef.,Std.Err.,z,P..z.,X.0.025,X0.975.,AnnotationLevel,CellType_Level1,CellType_Level2,FactorName,disease,Pval_adj,Function
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<chr>
5510,"C(disease, Treatment(reference=""healthy""))[T.CD]",0.928,0.401,2.313,0.021,0.141,1.714,Level2,T_CD8_NonNaive,T_CD8_CM,109-X-T_CD8_NonNaive-X-IFN_Type_1_2_Lambda,CD,0.094117647,IFN_Type_1_2_Lambda
5519,"C(disease, Treatment(reference=""healthy""))[T.PS]",0.308,0.382,0.806,0.42,-0.441,1.056,Level2,T_CD8_NonNaive,T_CD8_CM,109-X-T_CD8_NonNaive-X-IFN_Type_1_2_Lambda,PS,0.699689119,IFN_Type_1_2_Lambda
5520,"C(disease, Treatment(reference=""healthy""))[T.PSA]",0.772,0.382,2.021,0.043,0.023,1.52,Level2,T_CD8_NonNaive,T_CD8_CM,109-X-T_CD8_NonNaive-X-IFN_Type_1_2_Lambda,PSA,0.161502347,IFN_Type_1_2_Lambda
5521,"C(disease, Treatment(reference=""healthy""))[T.RA]",1.215,0.355,3.428,0.001,0.52,1.91,Level2,T_CD8_NonNaive,T_CD8_CM,109-X-T_CD8_NonNaive-X-IFN_Type_1_2_Lambda,RA,0.007407407,IFN_Type_1_2_Lambda
5522,"C(disease, Treatment(reference=""healthy""))[T.SLE]",0.883,0.18,4.895,0.0,0.529,1.237,Level2,T_CD8_NonNaive,T_CD8_CM,109-X-T_CD8_NonNaive-X-IFN_Type_1_2_Lambda,SLE,0.0,IFN_Type_1_2_Lambda
5523,"C(disease, Treatment(reference=""healthy""))[T.UC]",1.263,0.397,3.184,0.001,0.486,2.041,Level2,T_CD8_NonNaive,T_CD8_CM,109-X-T_CD8_NonNaive-X-IFN_Type_1_2_Lambda,UC,0.007407407,IFN_Type_1_2_Lambda


In [11]:
# Prepare data for heatmap
heatmap_data <- filtered_data %>%
  select(CellType_Level2, disease, Coef.) %>%
  spread(key = disease, value = Coef.)
head(heatmap_data)

Unnamed: 0_level_0,CellType_Level2,CD,PS,PSA,RA,SLE,UC
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,T_CD8_activated,1.666,0.867,1.637,1.903,1.7,2.573
2,T_CD8_CM,0.928,0.308,0.772,1.215,0.883,1.263
3,T_CD8_CM_stem,1.954,1.777,2.195,2.698,3.281,2.359
4,T_CD8_eff_HOBIT,1.417,0.959,1.093,2.096,1.746,2.434
5,T_CD8_EM_CX3CR1high,1.803,0.975,1.783,2.62,0.728,2.013
6,T_CD8_EM_CX3CR1int,2.147,2.111,1.565,2.295,1.749,2.012


**Generate heatmap DF**

In [12]:
heatmap_matrix <- as.matrix(heatmap_data[,-1])
rownames(heatmap_matrix) <- heatmap_data$CellType_Level2
heatmap_matrix

Unnamed: 0,CD,PS,PSA,RA,SLE,UC
T_CD8_activated,1.666,0.867,1.637,1.903,1.7,2.573
T_CD8_CM,0.928,0.308,0.772,1.215,0.883,1.263
T_CD8_CM_stem,1.954,1.777,2.195,2.698,3.281,2.359
T_CD8_eff_HOBIT,1.417,0.959,1.093,2.096,1.746,2.434
T_CD8_EM_CX3CR1high,1.803,0.975,1.783,2.62,0.728,2.013
T_CD8_EM_CX3CR1int,2.147,2.111,1.565,2.295,1.749,2.012
T_CD8_IFNresponse,2.223,1.045,1.71,1.996,0.762,1.944
T_CD8_Mem_cytotoxic,2.429,2.144,1.95,2.894,2.601,3.54


In [13]:
heatmap_df <- as.data.frame(heatmap_matrix)
rownames(heatmap_df) <- heatmap_data$CellType_Level2
colnames(heatmap_df) <- colnames(heatmap_data)[2:length(colnames(heatmap_data))]
heatmap_df

Unnamed: 0_level_0,CD,PS,PSA,RA,SLE,UC
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
T_CD8_activated,1.666,0.867,1.637,1.903,1.7,2.573
T_CD8_CM,0.928,0.308,0.772,1.215,0.883,1.263
T_CD8_CM_stem,1.954,1.777,2.195,2.698,3.281,2.359
T_CD8_eff_HOBIT,1.417,0.959,1.093,2.096,1.746,2.434
T_CD8_EM_CX3CR1high,1.803,0.975,1.783,2.62,0.728,2.013
T_CD8_EM_CX3CR1int,2.147,2.111,1.565,2.295,1.749,2.012
T_CD8_IFNresponse,2.223,1.045,1.71,1.996,0.762,1.944
T_CD8_Mem_cytotoxic,2.429,2.144,1.95,2.894,2.601,3.54


**Disease order**

In [14]:
disease_order = c(
  'RA','PS', 'PSA', 'CD','UC','SLE'
)

**Celltype order**

In [15]:
heatmap_celltypes_order= c("T_CD8_CM", "T_CD8_CM_stem", "T_CD8_EM_CX3CR1high", "T_CD8_EM_CX3CR1int", "T_CD8_eff_HOBIT", "T_CD8_IFNresponse", "T_CD8_Mem_cytotoxic", "T_CD8_activated") # , "T_CD8_arrested"

## Color Annotations

In [16]:
# Prepare annotations
# Column annotations
col_annotations <- metadata %>%
  mutate(diseaseGroup = as.factor(diseaseGroup))
col_annotations <- col_annotations[match(disease_order, col_annotations$disease), ]
rownames(col_annotations) <- NULL
order_col <- col_annotations$disease
col_annotations$Disease <- col_annotations$disease
col_annotations <- col_annotations %>% 
  tibble::column_to_rownames("disease")
col_annotations

Unnamed: 0_level_0,diseaseGroup,Disease
Unnamed: 0_level_1,<fct>,<chr>
RA,IMIDs,RA
PS,IMIDs,PS
PSA,IMIDs,PSA
CD,IMIDs,CD
UC,IMIDs,UC
SLE,IMIDs,SLE


In [17]:
row_annotations <- filtered_data  %>% select(CellType_Level2) %>% distinct()
row_annotations$CellType <- row_annotations$CellType_Level2
row_annotations <- row_annotations %>% 
  tibble::column_to_rownames("CellType_Level2")
row_annotations

Unnamed: 0_level_0,CellType
Unnamed: 0_level_1,<chr>
T_CD8_CM,T_CD8_CM
T_CD8_CM_stem,T_CD8_CM_stem
T_CD8_EM_CX3CR1high,T_CD8_EM_CX3CR1high
T_CD8_EM_CX3CR1int,T_CD8_EM_CX3CR1int
T_CD8_IFNresponse,T_CD8_IFNresponse
T_CD8_Mem_cytotoxic,T_CD8_Mem_cytotoxic
T_CD8_activated,T_CD8_activated
T_CD8_eff_HOBIT,T_CD8_eff_HOBIT


### Define Palettes

**Celltypes**

In [18]:
annotation_Level2_palette <- list(
    'T_CD8_Naive' = "#0FFFFF",
    'T_CD8_CM' = "#29AB87",
    'T_CD8_CM_stem' = "#20B2AA",
    'T_CD8_EM_CX3CR1high' = "#99FFFF",
    'T_CD8_EM_CX3CR1int' = "#81D8D0",
    'T_CD8_eff_HOBIT' = "#007A74",
    'T_CD8_IFNresponse' = "#3EB489", 
    'T_CD8_Mem_cytotoxic' = "#37a17b",
    'T_CD8_activated' = "#5ec7a1",
    'T_CD8_arrested' ="#5ec76d"
)
celltype_names <- names(annotation_Level2_palette)
l2_palette <- unlist(annotation_Level2_palette)

**Diseases**

In [19]:
# Create the named list
diseases_palette <- list(  
  'RA' = '#264653',
  'PS' = '#287271',
  'PSA' = '#2a9d8f',
  'CD' = '#e76f51',
  'UC' = '#e9c46a',
  'SLE' = '#941c2f'
)

# Generate vectors for keys and values
diseases <- names(diseases_palette)
disease_palette <- unlist(diseases_palette)

**Generate breaks for scale**

**Define scale palette**

In [20]:
clipValues <- function(v, minV, maxV) {
    if (is.na(v)) {
        return(v)
    } else if (v >= 0) {
        return(min(v, maxV))
    } else if (v < 0) {
        return(max(v, minV))
    } 
}

In [21]:
# COLOR PALETTE

# Manual definition
# clrP <- colorRampPalette(c("#ffffc1", "#6b001d"))(100)
# clrN <- colorRampPalette(c("#0a2258", "#f5faff"))(100)

# Predefined color palette
clrP <- colorRampPalette(brewer.pal(n = 9, name = "YlOrRd"))(100)
clrN <- colorRampPalette(rev(brewer.pal(n = 9, name = "Blues")))(100)

clr <- c(clrN, clrP)

In [22]:
# VALUE RANGES (those values are passed to clipValues function)

# NO CONSTRAINTS
# max_value = max(heatmap_df, na.rm = T)
# min_value = min(heatmap_df, na.rm = T)

# CLIPPEND WITH QUANTILE
max_value = quantile(heatmap_df[heatmap_df >= 0], na.rm = T, probs = .99)
min_value = quantile(heatmap_df[heatmap_df < 0], na.rm = T, probs = .01)

**Define annotation palette**

In [23]:
# Disease
subset_diseases <- intersect(diseases, col_annotations$Disease)
disease_colors <- disease_palette[subset_diseases]
# Celltype
subset_celltype <- intersect(celltype_names, row_annotations$CellType)
cell_type_colors <- l2_palette[subset_celltype]

# Create the annotation colors list
my_colour_annot <- list(
  disease = setNames(disease_colors, subset_diseases),
  CellType = setNames(cell_type_colors, subset_celltype)
)

In [24]:
my_colour_annot

## Generate Heatmap

In [25]:
length(row_annotations$CellType)

In [26]:
length(col_annotations$Disease)

In [27]:
print(ncol(heatmap_matrix))
print(nrow(heatmap_matrix))

[1] 6
[1] 8


In [28]:
all(rownames(heatmap_df) %in% heatmap_celltypes_order)

In [29]:
all(colnames(heatmap_df) %in% disease_order)

In [30]:
heatmap_celltypes_order

In [31]:
heatmap_df = heatmap_df[heatmap_celltypes_order, disease_order]
ordered_row_annotations <- row_annotations %>%
  mutate(CellType = factor(CellType, levels = heatmap_celltypes_order)) %>%
  arrange(CellType)

In [32]:
ordered_row_annotations

Unnamed: 0_level_0,CellType
Unnamed: 0_level_1,<fct>
T_CD8_CM,T_CD8_CM
T_CD8_CM_stem,T_CD8_CM_stem
T_CD8_EM_CX3CR1high,T_CD8_EM_CX3CR1high
T_CD8_EM_CX3CR1int,T_CD8_EM_CX3CR1int
T_CD8_eff_HOBIT,T_CD8_eff_HOBIT
T_CD8_IFNresponse,T_CD8_IFNresponse
T_CD8_Mem_cytotoxic,T_CD8_Mem_cytotoxic
T_CD8_activated,T_CD8_activated


**Define scale palette**

In [33]:
clipValues <- function(v, minV, maxV) {
    if (is.na(v)) {
        return(v)
    }else if (v >= 0) {
        return(min(v, maxV))
    } else if (v < 0) {
        return(max(v, minV))
    } 
}

In [34]:
# COLOR PALETTE

# Manual definition
# clrP <- colorRampPalette(c("#ffffc1", "#6b001d"))(100)
# clrN <- colorRampPalette(c("#0a2258", "#f5faff"))(100)

# Predefined color palette
clrP <- colorRampPalette(brewer.pal(n = 9, name = "YlOrRd"))(100)
clrN <- colorRampPalette(rev(brewer.pal(n = 9, name = "Blues")))(100)

clr <- c(clrN, clrP)

In [35]:
# VALUE RANGES (those values are passed to clipValues function)

# NO CONSTRAINTS
max_v = max(heatmap_df, na.rm = T)
min_v = min(heatmap_df, na.rm = T)

# CLIPPEND WITH QUANTILE
max_value = quantile(heatmap_df[heatmap_df >= 0], na.rm = T, probs = .99)
min_value = quantile(heatmap_df[heatmap_df < 0], na.rm = T, probs = .01)

**Plot heatmap**

In [36]:
# Prepare data for heatmap
significance_data <- heatmap_data <- filtered_data %>%
  select(CellType_Level2, disease, Pval_adj) %>%
  spread(key = disease, value = Pval_adj)
head(significance_data)

Unnamed: 0_level_0,CellType_Level2,CD,PS,PSA,RA,SLE,UC
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,T_CD8_activated,0.158490566,0.581818182,0.31428571,0.072727273,0.0,0.007407407
2,T_CD8_CM,0.094117647,0.699689119,0.16150235,0.007407407,0.0,0.007407407
3,T_CD8_CM_stem,0.007407407,0.007407407,0.0,0.0,0.0,0.0
4,T_CD8_eff_HOBIT,0.431432974,0.655679287,0.55930087,0.094117647,0.0,0.076876877
5,T_CD8_EM_CX3CR1high,0.035555556,0.364489112,0.03030303,0.0,0.06173633,0.013559322
6,T_CD8_EM_CX3CR1int,0.058471761,0.134320988,0.19642857,0.013559322,0.0,0.072727273


In [37]:
significance_data <- significance_data %>%
  tibble::column_to_rownames("CellType_Level2")
head(significance_data)

Unnamed: 0_level_0,CD,PS,PSA,RA,SLE,UC
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
T_CD8_activated,0.158490566,0.581818182,0.31428571,0.072727273,0.0,0.007407407
T_CD8_CM,0.094117647,0.699689119,0.16150235,0.007407407,0.0,0.007407407
T_CD8_CM_stem,0.007407407,0.007407407,0.0,0.0,0.0,0.0
T_CD8_eff_HOBIT,0.431432974,0.655679287,0.55930087,0.094117647,0.0,0.076876877
T_CD8_EM_CX3CR1high,0.035555556,0.364489112,0.03030303,0.0,0.06173633,0.013559322
T_CD8_EM_CX3CR1int,0.058471761,0.134320988,0.19642857,0.013559322,0.0,0.072727273


In [38]:
significance_data = significance_data[heatmap_celltypes_order, disease_order]

In [39]:
sig_to_plot <- significance_data %>%
  #mutate(across(everything(), ~ ifelse(. < 0.05, "·", "")))
  mutate(across(everything(), ~ ifelse(is.na(.), "", ifelse(. < 0.05, "·", ""))))
head(sig_to_plot)

Unnamed: 0_level_0,RA,PS,PSA,CD,UC,SLE
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
T_CD8_CM,·,,,,·,·
T_CD8_CM_stem,·,·,·,·,·,·
T_CD8_EM_CX3CR1high,·,,·,·,·,
T_CD8_EM_CX3CR1int,·,,,,,·
T_CD8_eff_HOBIT,,,,,,·
T_CD8_IFNresponse,·,,·,·,·,


In [40]:
#options(repr.plot.width = 10, repr.plot.height = 20, repr.plot.res = 100)
#breaks <- c(head(seq(min_value,0, length.out = 101), -1),seq(0, max_value, length.out = 100))
figure_path <-  paste0(base_path,"/03_downstream_analysis/06_inflammation_signatures/results/heatmap/heatmap_Corr_L2_TCD8NonNaive_IFN12L.pdf")
pdf(figure_path, width = 10, height = 10)
a <- pheatmap(mat = heatmap_df,
              #apply(heatmap_df, c(1,2)),FUN = function(x) {clipValues(x, min_value, max_value)}
  border_color = FALSE,
  color = colorRampPalette(brewer.pal(n = 9, name = "YlOrRd"))(100),
  #breaks  =  breaks,
  display_numbers = sig_to_plot,
  fontsize_number = 10,
  na_col = "white",
  cluster_cols = FALSE,
  cluster_rows = FALSE,
  cellheight = 5,
  cellwidth = 7,
  cex = 1, 
  fontsize = 6,
  annotation_colors = my_colour_annot,
  annotation_col = col_annotations,
  annotation_row = ordered_row_annotations,
  #gaps_col = cumsum(table(col_annotations$diseaseGroup)),
  #gaps_row = cumsum(table(row_annotations$Function)),
  #gaps_col = c(7, 10, 11, 15, 19),
  #gaps_row = c(11, 22, 33, 44, 55, 66, 77, 88, 99, 101, 103, 105, 107, 109, 111, 113, 114, 115, 116, 117), # GAP ROW FOR FUNCTIONS
  #gaps_row = c(10, 19, 28, 38, 49, 63, 77, 88, 99, 108), # GAP ROW FOR CELLTYPES w/ global
  #gaps_row = c(10, 19, 28, 39, 51, 65, 79, 90, 101, 110), # GAP ROW FOR CELLTYPES
  filename = figure_path,
  #legend = TRUE,
  #legend_breaks = c(max_value, -4, -2, -1, 0, 1, 2, 4, 6, min_value),
  #legend_labels = names(my_colors),
  height = 18, 
  width = 15
)
print(a)
dev.off()

In [41]:
min_value