In [None]:
library(readxl)
library(pheatmap)
library(edgeR)
library(dplyr)

In [None]:
Nanostring_Proteins = data.frame(read_excel("All Data Human IO Protein.xlsx", sheet = "Area Normalized"))

In [None]:
Nanostring_Proteins <- Nanostring_Proteins[order(Nanostring_Proteins$Segment..Name..Label.), ]
Nanostring_Proteins$Row_Annot <- paste0(Nanostring_Proteins$Scan_ID, "_", Nanostring_Proteins$ROI_ID)
Nanostring_Proteins$Row_Annot[7:12] <- c('SCC R01_1','SCC R01_2','SCC R01_3', 'SCC P04_4', 'SCC P04_5', 'SCC R01_6')
Nanostring_Proteins$Row_Annot[19:24] <- c('SCC R01_1','SCC R01_2','SCC R01_3', 'SCC P04_4', 'SCC P04_5', 'SCC R01_6')

row_annotations <- Nanostring_Proteins[, c("Row_Annot","Segment..Name..Label.")]
rownames(row_annotations) <- paste0(row_annotations$Row_Annot, "_", row_annotations$Segment..Name..Label.)
row_annotations$Row_Annot <- substr(row_annotations$Row_Annot,4,15)

numerical_Data <- Nanostring_Proteins[, -c(1, 2, 3, 4, 5, 6, 7, 8, 9, 58)]
rownames(numerical_Data) <- paste0("SCC",row_annotations$Row_Annot, "_", row_annotations$Segment..Name..Label.)

row_annotations_sorted <- row_annotations %>%
  arrange(Segment..Name..Label., desc(Row_Annot	))
numerical_Data_sorted <- numerical_Data[match(rownames(row_annotations_sorted), rownames(numerical_Data)), ]
expression_data <- t(numerical_Data_sorted)

# ================================================== Standard Library size normalisation ===========================================
# Step 1: Calculate library sizes (column sums)
library_sizes <- colSums(expression_data)

# Step 2: Normalize by library sizes (Counts Per Million - CPM)
normalized_data <- sweep(expression_data, 2, library_sizes, FUN = "/") * 1e6
normalized_data <- t(normalized_data)

# ================================================== ROI Library size normalisation ===========================================
# expression_data <- numerical_Data_sorted
# expression_data$patient <- sapply(strsplit(rownames(expression_data), "_"), function(x) {paste0(x[1], "_", x[3])})
# normalized_data <- expression_data %>%
#                    group_by(patient) %>%
#                    mutate(across(where(is.numeric), # Select numeric columns
#                     ~ (.x / sum(.x, na.rm = TRUE)) * 1e6 # Normalize and scale
#                    )) %>% ungroup()

# ================================================== Numer of ROI Standard Library size normalisation ===========================================
# expression_data <- numerical_Data_sorted
# expression_data$patient <- sapply(strsplit(rownames(expression_data), "_"), function(x) {paste0(x[1], "_", x[3])})
# normalized_data <- expression_data %>%
#   group_by(patient) %>%
#   mutate(across(
#     everything(), # Select all columns
#     ~ if (is.numeric(.x)) {
#       (.x / sum(.x, na.rm = TRUE)) * 1e6 / n()
#     } else {
#       .x # Non-numeric columns remain unchanged
#     },
#     .names = "{.col}" # Keep column names unchanged
#   )) %>%
#   ungroup()

# normalized_data <- as.data.frame(normalized_data)
# rownames(normalized_data) <- rownames(expression_data)
# normalized_data <- normalized_data[c(1:48)]

In [None]:
numerical_Data_sorted <- normalized_data[,c("MART1", "Ki.67", "PD.L2", "CD44", "CD80", "ER.alpha", "PanCk", "CTLA4", "GZMB", "PTEN", "NY.ESO.1" ,"EpCAM", "CD127", "CD25", "S6", "Ms.IgG2a", "PD.1", "Her2", "GAPDH", "Ms.IgG1", "Histone.H3", "Rb.IgG", "CD56", "CD27",
                                    "CD66b", "CD163", "Fibronectin", "FAP.alpha", "CD14", "CD68", "CD34", "SMA", "PR", "S100B", "CD20", "CD8", "HLA.DR", "ICOS", "Bcl.2", "CD3", "CD4", "CD45", "FOXP3", "CD45RO", "PD.L1", "CD40", "Beta.2.microglobulin", "CD11c")]

options(repr.plot.height=15,repr.plot.width=10)
colors <- colorRampPalette(c("navy", "white", "red"))(20)

save_pheatmap_pdf <- function(x, filename, width=10, height=15) {
    stopifnot(!missing(x))
    stopifnot(!missing(filename))
    pdf(filename, width=width, height=height)
    grid::grid.newpage()
    grid::grid.draw(x$gtable)
    dev.off()
}

# Plot heatmap
heatmap <- pheatmap(t(numerical_Data_sorted),
                    annotation_col = row_annotations,
                    color=colors,
                    gaps_row = c(23,23,23), 
                    gaps_col = c(12,12,12), 
                    cluster_rows = FALSE,  # Cluster rows
                    cluster_cols = FALSE,   # Cluster columns
                    scale="row",
                    fontsize_row = 20,    # Increase row label font size
                    fontsize_col = 14  )
heatmap
# save_pheatmap_pdf(heatmap,"/QRISdata/Q2051/SCC_Paper/resources/data/Data_For_Github_Codes/Onkar_Levi/Individual_panels_fig9_10/NanostringDSP_heatmap_All_Proteins_normalised.pdf")

In [None]:
numerical_Data_sorted <- tmm[,c("MART1", "Ki.67", "PD.L2", "CD44", "CD80", "ER.alpha", "PanCk", "CTLA4", "GZMB", "PTEN", "NY.ESO.1" ,"EpCAM", "CD127", "CD25", "S6", "Ms.IgG2a", "PD.1", "Her2", "GAPDH", "Ms.IgG1", "Histone.H3", "Rb.IgG", "CD56", "CD27",
                                    "CD66b", "CD163", "Fibronectin", "FAP.alpha", "CD14", "CD68", "CD34", "SMA", "PR", "S100B", "CD20", "CD8", "HLA.DR", "ICOS", "Bcl.2", "CD3", "CD4", "CD45", "FOXP3", "CD45RO", "PD.L1", "CD40", "Beta.2.microglobulin", "CD11c")]

options(repr.plot.height=15,repr.plot.width=10)
colors <- colorRampPalette(c("navy", "white", "red"))(20)

save_pheatmap_pdf <- function(x, filename, width=10, height=15) {
    stopifnot(!missing(x))
    stopifnot(!missing(filename))
    pdf(filename, width=width, height=height)
    grid::grid.newpage()
    grid::grid.draw(x$gtable)
    dev.off()
}

# Plot heatmap
heatmap <- pheatmap(t(numerical_Data_sorted),
                    annotation_col = row_annotations,
                    color=colors,
                    gaps_row = c(23,23,23), 
                    gaps_col = c(12,12,12), 
                    cluster_rows = FALSE,  # Cluster rows
                    cluster_cols = FALSE,   # Cluster columns
                    scale="row",
                    fontsize_row = 20,    # Increase row label font size
                    fontsize_col = 14  )
heatmap
# save_pheatmap_pdf(heatmap,"/scratch/project/stseq/Onkar/BigData/SCC/NanostringDSP_heatmap_All_Proteins.pdf")

In [None]:
# numerical_Data <- numerical_Data[,c("MART1", "Ki.67", "PD.L2", "CD44", "CD80", "ER.alpha", "PanCk", "CTLA4", "GZMB", "PTEN", "NY.ESO.1" ,"EpCAM", "CD127", "CD25", "S6", "Ms.IgG2a", "PD.1", "Her2", "GAPDH", "Ms.IgG1", "Histone.H3", "Rb.IgG", "CD56", "CD27",
#                                     "CD66b", "CD163", "Fibronectin", "FAP.alpha", "CD14", "CD68", "CD34", "SMA", "PR", "S100B", "CD20", "CD8", "HLA.DR", "ICOS", "Bcl.2", "CD3", "CD4", "CD45", "FOXP3", "CD45RO", "PD.L1", "CD40", "Beta.2.microglobulin", "CD11c")]

# options(repr.plot.height=10,repr.plot.width=17.5)
# colors <- colorRampPalette(c("blue", "white", "red"))(50)

# # Plot heatmap
# pheatmap(
#   numerical_Data,
#   annotation_row = row_annotations,
#   color=colors,
#   gaps_row = 12, 
#   gaps_col = 23, 
#   cluster_rows = FALSE,  # Cluster rows
#   cluster_cols = FALSE,   # Cluster columns
#   scale="column",
#   fontsize_row = 14,    # Increase row label font size
#   fontsize_col = 14  )

In [None]:
# update_numerical_Data <- update_numerical_Data[,c("MART1", "Ki.67", "PD.L2", "CD44", "CD80", "ER.alpha", "PanCk", "CTLA4", "GZMB", "PTEN", "NY.ESO.1" ,"EpCAM", "CD127", "CD25", "S6", "Ms.IgG2a", "PD.1", "Her2", "GAPDH", "Ms.IgG1", 
                                    # "CD66b", "CD163", "Fibronectin", "FAP.alpha", "CD14", "CD68", "CD34", "SMA", "PR", "S100B", "CD20", "CD8", "HLA.DR", "ICOS", "Bcl.2", "CD3", "CD4", "CD45", "FOXP3", "Histone.H3", "Rb.IgG", "CD56", "CD27", "CD45RO", "PD.L1", "CD40", "Beta.2.microglobulin", "CD11c")]
# update_numerical_Data <- edgeR::cpm(numerical_Data)

# options(repr.plot.height=10,repr.plot.width=17.5)
# colors <- colorRampPalette(c("blue", "white", "red"))(50)
# # Plot heatmap
# pheatmap(
#   update_numerical_Data,
#   annotation_row = row_annotations,
#   color=colors,
#   gaps_row = 12, 
#   gaps_col = 23, 
#   cluster_rows = FALSE,  # Cluster rows
#   cluster_cols = FALSE,   # Cluster columns
#   scale="row",
#   fontsize_row = 14,    # Increase row label font size
#   fontsize_col = 14  )