# **Survival Analysis Pipline using Kaplain Mier and Logrank test**

Library and data preparation

In [None]:
# Load necessary libraries
library(survival)
library(ggplot2)
library(survminer)
library(cowplot)

# Load the merged data file
file_path <- 'combined_data.csv'
data <- read.csv(file_path)

Define genes of interest

In [None]:

# Define the genes of interest in the desired order
genes_of_interest <- c('ENO2', 'ENO3', 'GCK', 'HIF1A', 'HK2', 'PFKFB3', 'PFKP', 'PKM', 'SLC2A1', 'SLC2A2')


Handeling the time to event and the event

In [None]:

# Convert 'days_to_last_follow_up' and 'days_to_death' to numeric, coercing errors to NA
data$days_to_last_follow_up <- as.numeric(as.character(data$days_to_last_follow_up))
data$days_to_death <- as.numeric(as.character(data$days_to_death))

# Create a time-to-event variable based on the patient's vital status
data$time_to_event <- ifelse(!is.na(data$days_to_death), data$days_to_death, data$days_to_last_follow_up)

# Create an event variable where 1 indicates death and 0 indicates censored (alive)
data$event <- ifelse(!is.na(data$days_to_death), 1, 0)

# Ensure 'Condition' is a factor with the correct levels
data$HIF1A <- factor(data$HIF1A, levels = c("H_E", "N_A"))

# Remove rows with NA values in 'time_to_event' or 'event'
data <- na.omit(data[, c('time_to_event', 'event', genes_of_interest, 'HIF1A')])


**Plotting**

In [None]:

# List to store ggsurvplot objects
plots <- list()

# Subplot labels
subplot_labels <- letters[1:length(genes_of_interest)]

# Loop over each gene of interest
for (i in seq_along(genes_of_interest)) {
  gene <- genes_of_interest[i]

  # Determine if the gene is HIF1A to handle the Condition grouping separately
  if (gene == 'HIF1A') {
    # Reverse the levels of Condition for HIF1A to reverse the color mapping
    data$HIF1A <- factor(data$HIF1A, levels = c("N_A", "H_E"))

    # Create a survival object for HIF1A grouped by Condition
    surv_object <- Surv(time = data$time_to_event, event = data$event)
    fit <- survfit(surv_object ~ HIF1A, data = data)

    # Plot using ggsurvplot with reversed factor levels and colors for HIF1A
    g <- ggsurvplot(
      fit,
      data = data,
      pval = TRUE,
      conf.int = FALSE,  # Remove confidence interval shading
      risk.table = FALSE,  # Remove the risk table
      legend.title = "Condition",  # Set legend title for HIF1A
      legend.labs = c("N_A: Not Altered", "H_E: Highly Expressed"),
      ggtheme = theme_minimal() + theme(panel.background = element_rect(fill = "gray90")),  # Keep the legend for HIF1A
      title = NULL,  # Remove individual plot titles
      palette = c("#00BFC4", "#F8766D"),  # Reversed colors: Blue for H_E and Pink for N_A
      ylab = ifelse(i == 1, "Survival Probability", "")  # Set y-axis label only for the first plot
    ) +
      labs(tag = subplot_labels[i])  # Add subplot labels

    # Reset the levels for further plots
    data$HIF1A <- factor(data$HIF1A, levels = c("H_E", "N_A"))

  } else {
    # Calculate the median expression level for the gene
    median_expression <- median(data[[gene]], na.rm = TRUE)

    # Create a new column to classify samples as 'high' or 'low' expression based on the median
    data[[paste0(gene, '_expression_group')]] <- ifelse(data[[gene]] >= median_expression, 'High Expression', 'Low Expression')

    # Create a survival object for other genes grouped by expression levels
    surv_object <- Surv(time = data$time_to_event, event = data$event)
    fit <- survfit(surv_object ~ data[[paste0(gene, '_expression_group')]], data = data)

    # Use consistent legend labels for non-HIF1A genes
    legend_labels <- c("High Expression", "Low Expression")

    # Plot using ggsurvplot without confidence interval shading, risk table, and legend
    g <- ggsurvplot(
      fit,
      data = data,
      pval = TRUE,
      conf.int = FALSE,  # Remove confidence interval shading
      risk.table = FALSE,  # Remove the risk table
      legend.title = gene,
      legend.labs = legend_labels,  # Adjust labels for all groups
      palette = c("#F8766D", "#00BFC4"),  # Default colors for consistency
      ggtheme = theme_minimal() + theme(panel.background = element_rect(fill = "gray90"), legend.position = "none"),  # Remove legend
      title = NULL,  # Remove individual plot titles
      ylab = ifelse(i == 1, "Survival Probability", "")  # Set y-axis label only for the first plot
    ) +
      labs(tag = subplot_labels[i])  # Add subplot labels
  }

  # Store the full ggsurvplot object in the list
  plots[[gene]] <- g  # Store the entire ggsurvplot object
}

# Combine all plots into one, arranged in the desired order
combined_plot <- arrange_ggsurvplots(plots, ncol = 3, nrow = 4, print = TRUE) # Ensure all plots are displayed properly

# Save the combined plot to a file
ggsave(filename = 'Combined_Kaplan_Meier_Plots_with_HIF1A_ordered.png', plot = combined_plot, width = 15, height = 10)

print("All Kaplan-Meier survival analyses completed successfully and saved in a single plot with the correct legend setup.")
