In [1]:
# Volcano Plot Analysis for Differential Expression Results

# This notebook generates volcano plots for differential expression analysis.
# It reads upregulated and downregulated gene lists, labels significant genes,
# highlights top upregulated/downregulated genes, and creates volcano plots for each comparison.

# Import required libraries
library(tidyverse)
library(RColorBrewer)
library(ggrepel)
library(cowplot)
library(readxl)

-- [1mAttaching core tidyverse packages[22m ------------------------ tidyverse 2.0.0 --
[32mv[39m [34mdplyr    [39m 1.1.4     [32mv[39m [34mreadr    [39m 2.1.5
[32mv[39m [34mforcats  [39m 1.0.0     [32mv[39m [34mstringr  [39m 1.5.1
[32mv[39m [34mggplot2  [39m 3.5.0     [32mv[39m [34mtibble   [39m 3.2.1
[32mv[39m [34mlubridate[39m 1.9.3     [32mv[39m [34mtidyr    [39m 1.3.1
[32mv[39m [34mpurrr    [39m 1.0.2     
-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mi[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: 'cowplot'


The following object is masked from 'package:lubridate':

    stamp




In [None]:
# Function to read, merge and annotate gene list data
read_and_prepare_data <- function(up_file, down_file) {
  gene_list_up <- read_excel(up_file)
  gene_list_down <- read_excel(down_file)
  gene_list <- rbind(gene_list_up, gene_list_down)
  
  # Annotate differential expression
  gene_list$diffexpressed <- "NO"
  gene_list$diffexpressed[gene_list$logFC > 1 & gene_list$P.Value < 0.05] <- "UP"
  gene_list$diffexpressed[gene_list$logFC < -1 & gene_list$P.Value < 0.05] <- "DOWN"
  
  # Filter for significant genes
  filtered_genes <- gene_list[gene_list$P.Value < 0.05, ]
  filtered_genes <- filtered_genes[order(filtered_genes$logFC), ]
  
  # Identify top 3 upregulated and downregulated genes
  top_downregulated <- head(filtered_genes, 3)
  top_upregulated <- tail(filtered_genes, 3)
  top_genes <- rbind(top_downregulated, top_upregulated)
  
  # Label top genes
  gene_list$delabel <- ifelse(gene_list$ID %in% top_genes$ID, gene_list$ID, NA)
  
  return(gene_list)
}

In [None]:
# Function to generate a volcano plot
generate_volcano_plot <- function(gene_data, title) {
  ggplot(data = gene_data, aes(x = logFC, y = -log10(P.Value), col = diffexpressed, label = delabel)) +
    geom_vline(xintercept = c(-1, 1), col = "gray", linetype = 'dashed') +
    geom_hline(yintercept = -log10(0.05), col = "gray", linetype = 'dashed') +
    geom_point(size = 4) +
    scale_color_manual(values = c("#00AFBB", "grey", "#bb0c00"),
                       labels = c("Downregulated", "Not significant", "Upregulated")) +
    coord_cartesian(ylim = c(0, 6.5), xlim = c(-5, 5)) +
    labs(color = '', x = expression("log"[2]*"FC"), y = expression("-log"[10]*"p-value")) +
    ggtitle(title) +
    theme_minimal(base_size = 24) +
    theme(plot.title = element_text(face = 'bold', hjust = 0.5, size = 32)) +
    geom_text_repel(max.overlaps = Inf, size = 8, color = 'black')
}

In [None]:
# Example: Read data and generate a volcano plot

# Input files (adjust paths to your files)
input_bc_up <- "B_vs_C_UP.xlsx"
input_bc_down <- "B_vs_C_DOWN.xlsx"

# Prepare data and generate plot
gene_list_BC <- read_and_prepare_data(input_bc_up, input_bc_down)
plot_BC <- generate_volcano_plot(gene_list_BC, "B vs C")

# Display the plot
print(plot_BC)
