In [None]:
# Heatmap and PCA Analysis of miRNA Expression Data

# This notebook processes miRNA expression data by generating heatmaps and PCA plots.
# It reads an input file containing miRNA expression levels, scales the data, and performs PCA.

# Import required libraries
library(ComplexHeatmap)
library(RColorBrewer)
library(circlize)
library(dplyr)
library(ggplot2)
library(ggfortify)
library(factoextra)

In [None]:
# Function to process input data
process_input_data <- function(input_file) {
  info_expr <- read.delim(input_file, check.names = FALSE)
  
  # Select miRNAs of interest
  extracted_feature_names <- c('hsa-miR-136-5p', 'hsa-miR-513c-3p', 'hsa-miR-514a-5p', 'hsa-miR-514a-3p', 'hsa-miR-507')
  selected_rows <- info_expr[rownames(info_expr) %in% extracted_feature_names, ]
  
  # Extract sample groups from column names
  snames_2 <- colnames(info_expr)
  split_names <- strsplit(snames_2, "_")
  sample_group <- sapply(split_names, function(x) x[3])
  sample_group[is.na(sample_group)] <- "C"
  sample_group <- gsub("BS", "B", sample_group)
  
  list(selected_rows = selected_rows, sample_group = sample_group)
}

In [None]:
# Function to generate heatmap
generate_heatmap <- function(processed_data) {
  genes_expressed_matrix <- data.matrix(processed_data$selected_rows)
  gene_expr_matrix <- t(scale(t(genes_expressed_matrix)))
  
  ha = HeatmapAnnotation(Group = processed_data$sample_group, 
                         col = list(Group = c("B" = "darkred", "C" = "darkgreen", "V" = "lightblue")))
  
  ht_list <- Heatmap(gene_expr_matrix, show_row_names = TRUE, cluster_rows = FALSE, 
                     top_annotation = ha, name = "Z-score")
  
  draw(ht_list, merge_legend = TRUE, heatmap_legend_side = "left", annotation_legend_side = "left")
}

In [None]:
# Function to generate PCA plot
generate_pca_plot <- function(processed_data) {
  dat <- t(processed_data$selected_rows)
  dat.pca <- prcomp(dat, center = TRUE, scale = FALSE)
  
  # PCA of individuals
  pca_ind <- fviz_pca_ind(dat.pca, geom.ind = c("point", "text"), col.ind = processed_data$sample_group, 
                          addEllipses = TRUE, ellipse.level = 0.8, legend.title = "Groups") +
    labs(title = "PCA with miRNA markers", x = "PC1", y = "PC2") +
    theme(text = element_text(size = 20))
  
  # PCA of variables
  pca_var <- fviz_pca_var(dat.pca, geom.ind = c("point", "text"), col.var = "contrib", 
                          gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07")) +
    labs(title = '', x = "PC1", y = "PC2") +
    theme(text = element_text(size = 18))
  
  list(pca_ind, pca_var)
}

In [None]:
# Example: Read data and generate heatmap and PCA plots

# Input file (adjust path to your file)
input_file <- "miRNA_expression_data.txt"

# Process data and generate plots
processed_data <- process_input_data(input_file)
generate_heatmap(processed_data)
pca_plots <- generate_pca_plot(processed_data)

# Display PCA plots
print(pca_plots[[1]])
print(pca_plots[[2]])