In [None]:
#load peer covars
covars <- read.table(
  "param15.covars_out.txt",
  sep = "\t",
  header = TRUE,
  stringsAsFactors = FALSE
)

head(covars)

In [None]:
sample_names = colnames(covars[-1])
metadata <- data.frame(samples = sample_names)
metadata$groups <- sapply(strsplit(sample_names, "_"), function(x) paste(x[2], x[3], sep = "_"))
metadata$type <- sapply(strsplit(sample_names, "_"), function(x) paste(x[5],  sep = "_"))

head(metadata)

In [None]:
row.names(covars) <- covars$id
covars_matrix <- data.matrix(covars[-1])
head(covars_matrix)

In [None]:
colnames_matrix <- colnames(covars_matrix)

# find indices for each group within MOUSE and HUMAN samples
wt_mouse_cols <- grep("WT.*MOUSE", colnames_matrix)
het_mouse_cols <- grep("Het.*MOUSE", colnames_matrix)
homo_mouse_cols <- grep("Homo.*MOUSE", colnames_matrix)

wt_human_cols <- grep("WT.*HUMAN", colnames_matrix)
het_human_cols <- grep("Het.*HUMAN", colnames_matrix)
homo_human_cols <- grep("Homo.*HUMAN", colnames_matrix)

# calculate mean expression for each group
wt_mouse_mean <- rowMeans(covars_matrix[, wt_mouse_cols])
het_mouse_mean <- rowMeans(covars_matrix[, het_mouse_cols])
homo_mouse_mean <- rowMeans(covars_matrix[, homo_mouse_cols])

wt_human_mean <- rowMeans(covars_matrix[, wt_human_cols])
het_human_mean <- rowMeans(covars_matrix[, het_human_cols])
homo_human_mean <- rowMeans(covars_matrix[, homo_human_cols])

# Create a new dataframe with the means for both species
'mean_expression <- data.frame(
  LRP1_WT_MOUSE = wt_mouse_mean,
  LRP1_Homo_MOUSE = homo_mouse_mean,
  LRP1_Het_MOUSE = het_mouse_mean,
  LRP1_WT_HUMAN = wt_human_mean,
  LRP1_Homo_HUMAN = homo_human_mean,
  LRP1_Het_HUMAN = het_human_mean
)'

mean_expression <- data.frame(
  NRXN1_WT_MOUSE = wt_mouse_mean,
  NRXN1_Homo_MOUSE = homo_mouse_mean,
  NRXN1_Het_MOUSE = het_mouse_mean,
  NRXN1_WT_HUMAN = wt_human_mean,
  NRXN1_Homo_HUMAN = homo_human_mean,
  NRXN1_Het_HUMAN = het_human_mean
)

matrix <- data.matrix(mean_expression)
head(matrix)

In [None]:
#sample_values <- c("LRP1_WT_MOUSE", "LRP1_Het_MOUSE", "LRP1_Homo_MOUSE", 
#                  "LRP1_WT_HUMAN", "LRP1_Het_HUMAN", "LRP1_Homo_HUMAN")

sample_values <- c("NRXN1_WT_MOUSE", "NRXN1_Het_MOUSE", "NRXN1_Homo_MOUSE", 
                   "NRXN1_WT_HUMAN", "NRXN1_Het_HUMAN", "NRXN1_Homo_HUMAN")

group_values <- ifelse(grepl("LRP1", sample_values), "LRP1", "NRXN1")
species_values <- ifelse(grepl("MOUSE", sample_values), "ASTRO", "NEURON")

# metadata 
genotype_values <- ifelse(grepl("WT", sample_values), "WT", 
                   ifelse(grepl("Het", sample_values), "Het", 
                   ifelse(grepl("Homo", sample_values), "Homo", "Unknown")))

metadata_df <- data.frame(Sample = sample_values, Group = group_values, Genotype = genotype_values, Species = species_values)
print(metadata_df)

In [None]:
group_colors <- rainbow(length(unique(metadata_df$Genotype)))

custom_colors <- c("WT" = "#4B0082",   # Dark Purple (Indigo)
                   "Het" = "#800080",  # Purple
                   "Homo" = "#D8BFD8") # Light Purple (Thistle)

color_mapping <- setNames(custom_colors, names(custom_colors))
col_side_colors <- color_mapping[metadata_df$Genotype]
ordered_matrix <- matrix[, sample_values]
ordered_matrix

In [None]:
#plot average heatmap per genotype for mouse and human samples
library('gplots')
heatmap.2(ordered_matrix,
        ColSideColors=col_side_colors, 
        trace='none',
        margins=c(15, 10),
        Rowv=FALSE,        
        Colv=FALSE,
        dendrogram='none')

In [None]:
#prepare for individual donor heatmap

exp_matrix <- covars[-1]
base_names <- gsub("_[a-zA-Z]+_(HUMAN|MOUSE)$", "_\\1", colnames(exp_matrix))
unique_names <- unique(base_names)

#average replicates
collapsed_matrix <- sapply(unique_names, function(name) {
  cols_to_avg <- which(base_names == name)
  rowMeans(exp_matrix[, cols_to_avg, drop = FALSE])
})

collapsed_matrix <- as.data.frame(collapsed_matrix)
colnames(collapsed_matrix) <- unique_names
collapsed_matrix

In [None]:
col_order <- colnames(collapsed_matrix)

# extract genotype and species from column names
genotype <- ifelse(grepl("WT", col_order), "WT",
                   ifelse(grepl("Het", col_order), "Het", "Homo"))

species <- ifelse(grepl("MOUSE", col_order), "MOUSE", "HUMAN")
ordering_key <- paste(genotype, species, sep = "_")

desired_order <- c("WT_MOUSE", "Het_MOUSE", "Homo_MOUSE",
                   "WT_HUMAN", "Het_HUMAN", "Homo_HUMAN")

ordering_factor <- factor(ordering_key, levels = desired_order)

collapsed_matrix_ordered <- collapsed_matrix[, order(ordering_factor)]

collapsed_matrix_ordered <- as.matrix(collapsed_matrix_ordered)

collapsed_matrix_ordered

In [None]:
sample_names <- colnames(collapsed_matrix_ordered)

# Use regex to extract donor, gene, genotype, species
metadata_new <- data.frame(
  Sample = sample_names,
  Donor = sub("^([^_]+)_.*", "\\1", sample_names),  
  Gene = sub("^([^_]+)_([^_]+)_.*", "\\2", sample_names),
  Genotype = sub(".*_([^_]+)_(HUMAN|MOUSE)$", "\\1", sample_names),
  Species = sub(".*_(HUMAN|MOUSE)$", "\\1", sample_names),
  stringsAsFactors = FALSE
)

head(metadata_new)

In [13]:
custom_colors <- c("WT"   = "#4B0082",   # Indigo
                   "Het"  = "#800080",   # Purple
                   "Homo" = "#D8BFD8")   # Thistle


sample_names <- metadata_new$Sample
genotypes <- metadata_new$Genotype[match(sample_names, metadata_new$Sample)]
col_side_colors <- custom_colors[genotypes]

In [None]:
#plot peer results per donor and cell type
heatmap.2(collapsed_matrix_ordered,
        ColSideColors=col_side_colors, 
        trace='none',
        margins = c(12, 15),
        Rowv=FALSE,        # Disable row clustering
        Colv=FALSE,
        dendrogram='none')

### Significance testing

In [None]:
library(dplyr)
library(tidyr)
covars <- covars[, metadata$samples]

covars_long <- covars %>%
  as.data.frame() %>%
  tibble::rownames_to_column("PEER") %>%
  pivot_longer(cols = -PEER, names_to = "Sample", values_to = "Value")

# Merge with metadata to include group information
covars_long <- covars_long %>%
  left_join(metadata, by = c("Sample" = "samples"))

In [None]:
library(ggbeeswarm)
library(ggsignif)

plots_list <- list()

custom_colors <- c("WT" = "#4B0082",   
                   "Het" = "#800080",
                   "Homo" = "#D8BFD8") 


comparisons_list <- list(
  #c("LRP1_Het", "LRP1_WT"),
  #c("LRP1_Het", "LRP1_Homo"),
  #c("LRP1_WT", "LRP1_Homo")
  c("Het", "Homo"),
  c("Het", "WT"),
  c("Homo", "WT")
)
# Loop through each PEER factor
for (peer_factor in unique(covars_long$PEER)) {
  
  # Loop through each cell type
  for (species_type in unique(covars_long$type)) {
    
    current_data <- filter(covars_long, PEER == peer_factor, type == species_type)
    
    if (nrow(current_data) == 0) next  

    # group order: WT -> Het -> Homo
    current_data$genotype <- sub(".*_", "", current_data$groups)
    current_data$genotype <- factor(current_data$genotype, levels = c("WT", "Het", "Homo"))


    # beeswarm plot for each peer factor and cell type
    p <- ggplot(current_data, aes(x = genotype, y = Value, color = genotype)) +
  geom_beeswarm(size = 1.5) +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(
    x = "Group",
    y = "PEER Factor Value",
    title = paste(peer_factor, "-", species_type)
  ) +
  scale_color_manual(values = custom_colors) +
  geom_signif(
    comparisons = comparisons_list,
    map_signif_level = TRUE,
    y_position = c(1, 1.2, 1.4)
  )

    
    plots_list[[paste(peer_factor, species_type, sep = "_")]] <- p
  }
}


for (plot in plots_list) {
  print(plot)
}

## Prep GSEA input file ##

In [None]:
library(readr)
weights <- read.table("param15.weights_out.txt", sep = "\t")
colnames(weights) <- weights[1, ]
weights <- weights[-1, ]
counts <- read.table("counts_matrix.txt", sep = "\t")
colnames(counts) <- counts[1, ]
counts <- counts [-1, ]
#rownames(weights) <- counts[, 1]
#weights[1] <- counts[1]
weights$GENE = counts[, 1]
weights <- weights[c("GENE", setdiff(names(weights), "GENE"))]
#rownames(weights)
weights

## GSEA results analysis

In [26]:
library(dplyr)
library(gplots)
library(viridis)


peer_gsea_genes <- read.table("PEER_8_output.txt", header = TRUE)
peer_gsea_genes <- peer_gsea_genes[peer_gsea_genes$padj < 0.05, ]

PEER <- "PEER_8"

# read once outside the loop
merged_df <- read.table("merged_counts_and_res.csv", sep = ',', header = TRUE)
merged_df$gene <- toupper(merged_df$gene) #(mouse data)
rownames(merged_df) <- merged_df$gene
merged_df <- merged_df[-1] 

merged_df_filtered_all <- merged_df[, -c(1:6)]  

# iterate over all pathways
for (i in seq_len(nrow(peer_gsea_genes))) {

  gene_string <- peer_gsea_genes$leadingEdge[i]
  title <- peer_gsea_genes$pathway[i]
  safe_title <- gsub("[^A-Za-z0-9_]", "_", title)
  gene_vector <- trimws(unlist(strsplit(gene_string, split = ",")))

  merged_df_filtered <- merged_df[rownames(merged_df) %in% gene_vector, ]
  merged_df_filtered <- merged_df_filtered[, -c(1:6)]

  #### 1. Combined genotype version ####

  # Group by genotype
  wt_cols <- grep("WT", colnames(merged_df_filtered))
  het_cols <- grep("Het", colnames(merged_df_filtered))
  homo_cols <- grep("Homo", colnames(merged_df_filtered))

  wt_mean <- rowMeans(merged_df_filtered[, wt_cols])
  het_mean <- rowMeans(merged_df_filtered[, het_cols])
  homo_mean <- rowMeans(merged_df_filtered[, homo_cols])

  #mean_expression <- data.frame(LRP1_WT = wt_mean, LRP1_Het = het_mean, LRP1_Homo = homo_mean)

  matrix <- data.matrix(mean_expression)
  matrix_z_1 <- t(scale(t(matrix)))


  col_side_colors_merged <- c("#4B0082", "#800080", "#D8BFD8")  

  heatmap_file_1 <- paste0("/PEER/",PEER,"/", PEER, "_", safe_title, "_genes_heatmap.png")
  csv_file_1 <- sub(".png$", ".csv", heatmap_file_1)
  dir.create(dirname(heatmap_file_1), recursive = TRUE, showWarnings = FALSE)

  write.table(
    cbind(Gene = rownames(matrix), as.data.frame(matrix)),
    file = csv_file_1,
    sep = ",",
    row.names = FALSE,
    col.names = TRUE,
    quote = FALSE
  )
  png(heatmap_file_1, width = 1200, height = 1200, res = 150)
  heatmap.2(matrix_z_1,
            ColSideColors = col_side_colors_merged,
            trace = 'none',
            margins = c(15, 15),
            Rowv = TRUE,
            Colv = FALSE,
            dendrogram = 'none',
            main = title,
            col = viridis(100))
  dev.off()

  #### 2. Individual donor version ####

  base_names <- sub("_[a-z]$", "", colnames(merged_df_filtered))
  collapsed_matrix <- sapply(unique(base_names), function(name) {
    rowMeans(merged_df_filtered[, base_names == name, drop = FALSE])
  })

  colnames(collapsed_matrix) <- unique(base_names)
  collapsed_matrix <- as.matrix(collapsed_matrix)

  # Extract metadata
  metadata_new <- data.frame(
    Sample = colnames(collapsed_matrix),
    Donor = sub("^(ML[0-9.]+)_.*", "\\1", colnames(collapsed_matrix)),
    Gene = sub("^ML[0-9.]+_([^_]+)_.*", "\\1", colnames(collapsed_matrix)),
    Genotype = sub(".*_(WT|Het|Homo)$", "\\1", colnames(collapsed_matrix)),
    stringsAsFactors = FALSE
  )

  metadata_new$Genotype <- factor(metadata_new$Genotype, levels = c("WT", "Het", "Homo"))
  metadata_ordered <- metadata_new[order(metadata_new$Genotype, metadata_new$Donor), ]

# Reorder columns in the matrix to match metadata
collapsed_matrix_ordered <- collapsed_matrix[, metadata_ordered$Sample]

# Color by Genotype
col_side_colors <- c("WT" = "#4B0082", "Het" = "#800080", "Homo" = "#D8BFD8")[metadata_ordered$Genotype]

# Z-score transform by row
matrix_z_2 <- t(scale(t(collapsed_matrix_ordered)))

  heatmap_file_2 <- paste0("/PEER/",PEER,"/", PEER, "_", safe_title, "_individual_donors_genes_heatmap.png")
  csv_file_2 <- sub(".png$", ".csv", heatmap_file_2)
  #dir.create(dirname(heatmap_file_2), recursive = TRUE, showWarnings = FALSE)

  write.table(
    cbind(Gene = rownames(collapsed_matrix_ordered), as.data.frame(collapsed_matrix_ordered)),
    file = csv_file_2,
    sep = ",",
    row.names = FALSE,
    col.names = TRUE,
    quote = FALSE
  )
  png(heatmap_file_2, width = 1200, height = 1200, res = 150)
  heatmap.2(matrix_z_2,
            ColSideColors = col_side_colors,
            trace = 'none',
            margins = c(15, 15),
            Rowv = TRUE,
            Colv = FALSE,
            dendrogram = 'none',
            main = title,
            col = viridis(100))
  dev.off()
}

### GSEA Analysis Normalized by WT

In [40]:
library(dplyr)
library(gplots)
library(viridis)

peer_gsea_genes <- read.table("PEER_2_output.txt", header = TRUE)

peer_gsea_genes <- peer_gsea_genes[peer_gsea_genes$padj < 0.05, ]

PEER <- "PEER_2"

merged_df <- read.table("merged_counts_and_res.csv", sep = ',', header = TRUE)
merged_df$gene <- toupper(merged_df$gene) #(mouse data)
rownames(merged_df) <- merged_df$gene
merged_df <- merged_df[-1]  

merged_df_filtered_all <- merged_df[, -c(1:6)]  

# Iterate over all pathways
for (i in seq_len(nrow(peer_gsea_genes))) {

  gene_string <- peer_gsea_genes$leadingEdge[i]
  title <- peer_gsea_genes$pathway[i]
  safe_title <- gsub("[^A-Za-z0-9_]", "_", title)

  gene_vector <- trimws(unlist(strsplit(gene_string, split = ",")))

  merged_df_filtered <- merged_df[rownames(merged_df) %in% gene_vector, ]
  merged_df_filtered <- merged_df_filtered[, -c(1:6)]

  #### 1. Combined genotype version ####

# group by genotype
wt_cols <- grep("WT", colnames(merged_df_filtered))
het_cols <- grep("Het", colnames(merged_df_filtered))
homo_cols <- grep("Homo", colnames(merged_df_filtered))

wt_mean <- rowMeans(merged_df_filtered[, wt_cols])
het_mean <- rowMeans(merged_df_filtered[, het_cols])
homo_mean <- rowMeans(merged_df_filtered[, homo_cols])

norm_het <- het_mean - wt_mean
norm_homo <- homo_mean - wt_mean

#  new matrix with normalized values
mean_expression <- data.frame(NRXN1_WT = wt_mean, NRXN1_Het = norm_het, NRXN1_Homo = norm_homo)

matrix <- data.matrix(mean_expression)
matrix_z_1 <- t(scale(t(matrix)))

col_side_colors_merged <- c("#4B0082", "#800080", "#D8BFD8")  

heatmap_file_1 <- paste0("/PEER/",PEER,"_WT/", PEER, "_", safe_title, "_genes_heatmap.png")
csv_file_1 <- sub(".png$", ".csv", heatmap_file_1)

if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

write.table(
  cbind(Gene = rownames(matrix), as.data.frame(matrix)),
  file = csv_file_1,
  sep = ",",
  row.names = FALSE,
  col.names = TRUE,
  quote = FALSE
)

png(heatmap_file_1, width = 1200, height = 1200, res = 150)
heatmap.2(matrix_z_1,
          ColSideColors = col_side_colors_merged,
          trace = 'none',
          margins = c(15, 15),
          Rowv = TRUE,
          Colv = FALSE,
          dendrogram = 'none',
          main = title,
          col = viridis(100))
dev.off()
  #### 2. Individual donor version ####

  base_names <- sub("_[a-z]$", "", colnames(merged_df_filtered))

donors <- unique(sub("^((ML|CW)[0-9.]+)_.*", "\\1", colnames(merged_df_filtered)))
genes <- rownames(merged_df_filtered)
normalized_matrix <- matrix(NA, nrow = length(genes), ncol = 0)
colnames_list <- c()

for (donor in donors) {
  donor_cols <- grep(paste0("^", donor), colnames(merged_df_filtered), value = TRUE)
  donor_data <- merged_df_filtered[, donor_cols, drop = FALSE]

  wt <- rowMeans(donor_data[, grep("WT", colnames(donor_data)), drop = FALSE])
  het <- rowMeans(donor_data[, grep("Het", colnames(donor_data)), drop = FALSE])
  homo <- rowMeans(donor_data[, grep("Homo", colnames(donor_data)), drop = FALSE])

  # normalize by WT: subtract WT from each
  norm_wt <- rep(0, length(wt))  
  norm_het <- het - wt
  norm_homo <- homo - wt

  normalized_matrix <- cbind(normalized_matrix, norm_wt, norm_het, norm_homo)
  colnames_list <- c(colnames_list,
                     paste0(donor, "_WT"),
                     paste0(donor, "_Het"),
                     paste0(donor, "_Homo"))
}

rownames(normalized_matrix) <- genes
colnames(normalized_matrix) <- colnames_list

  metadata_new <- data.frame(
    Sample = colnames(normalized_matrix),
    Donor = sub("^(ML[0-9.]+)_.*", "\\1", colnames(normalized_matrix)),
    Gene = sub("^ML[0-9.]+_([^_]+)_.*", "\\1", colnames(normalized_matrix)),
    Genotype = sub(".*_(WT|Het|Homo)$", "\\1", colnames(normalized_matrix)),
    stringsAsFactors = FALSE
  )

  metadata_new$Genotype <- factor(metadata_new$Genotype, levels = c("WT", "Het", "Homo"))

  valid_cols <- colSums(is.na(normalized_matrix)) < nrow(normalized_matrix)

  metadata_filtered <- metadata_new[metadata_new$Sample %in% names(valid_cols[valid_cols]), ]
  metadata_ordered <- metadata_filtered %>%
  mutate(Genotype = factor(Genotype, levels = c("WT", "Het", "Homo"))) %>%
  arrange(Genotype, Donor)

collapsed_matrix_ordered <- normalized_matrix[, metadata_ordered$Sample, drop = FALSE]
  col_side_colors <- c("WT" = "#4B0082", "Het" = "#800080", "Homo" = "#D8BFD8")[metadata_ordered$Genotype]
  matrix_z_2 <- t(scale(t(collapsed_matrix_ordered)))

  heatmap_file_2 <- paste0("/PEER/",PEER,"_WT/", PEER, "_", safe_title, "_individual_donors_genes_heatmap.png")
  csv_file_2 <- sub(".png$", ".csv", heatmap_file_2)

  write.table(
    cbind(Gene = rownames(collapsed_matrix_ordered), as.data.frame(collapsed_matrix_ordered)),
    file = csv_file_2,
    sep = ",",
    row.names = FALSE,
    col.names = TRUE,
    quote = FALSE
  )
  png(heatmap_file_2, width = 1200, height = 1200, res = 150)
  heatmap.2(matrix_z_2,
            ColSideColors = col_side_colors,
            trace = 'none',
            margins = c(15, 15),
            Rowv = TRUE,
            Colv = FALSE,
            dendrogram = 'none',
            main = title,
            col = viridis(100))
  dev.off()
}