In [2]:
# import libraries 
library(ggplot2)
library(tidyverse)
library(ggfortify)
library(ggrepel)
library(factoextra)
library(FactoMineR)

In [None]:
sharks = read.table("datasets/shark_immunity.txt", header = TRUE, sep = "\t")

# change the order the physiological conditions are displayed 
sharks$physiological_condition <- factor(
  sharks$physiological_condition,
  levels = c("Healthy", "Stressed", "Injured", "Infected", "Recovering")
)
# reduces name to just the gene
names(sharks) <- sub("_expression", "", names(sharks))

# dataframe with gene expression split into 2 variables
sharks_genes = sharks %>% 
    pivot_longer(
        cols = starts_with("Gene_"),
        names_to = "gene",
        values_to = "expression"
    )

# dataframe where all numerical variables are split into 2 variables
sharks_super_long <- sharks %>%
  select(physiological_condition, immune_cell_type, Gene_A, Gene_B, Gene_C, Gene_D, Gene_E, Gene_F, regeneration_capacity, age_years, immune_receptor_diversity, response_to_infection, weight_kg, inflammation_level, resilience_score, reproduction_rate) %>% 
  pivot_longer(cols = -c("physiological_condition","immune_cell_type","Gene_A","Gene_B","Gene_C","Gene_D","Gene_E","Gene_F"), names_to = "variable", values_to = "value") %>%
    select(physiological_condition, immune_cell_type, Gene_A, Gene_B, Gene_C, Gene_D, Gene_E, Gene_F,variable,value) %>% 
    pivot_longer(cols = -c("physiological_condition", "immune_cell_type","variable","value"), names_to = "gene", values_to = "expression")

In [3]:
# data selection functions
condition_data = function(data, condition) {
    data[data$physiological_condition==condition, ]
}

cell_data = function(data, cell_type) {
    data[data$immune_cell_type==cell_type, ]
}