# Load libraries and data

In [None]:
library(tidyverse)
library(phyloseq)
library(microbiome)
library(rstatix)
library(vegan)
library(ggrepel)
library(gridExtra)
library(rlang)
library(viridis)
library(ggpubr)

options(repr.plot.width=12, repr.plot.height=10)

In [None]:
nasal_phylo_comp <- readRDS("../../results/Microbiome_analysis/nasal_samples_clean_comp.rds")
nasal_phylo_comp

In [None]:
nasal_phylo_clr <- readRDS("../../results/Microbiome_analysis/nasal_samples_clean_clr.rds")
nasal_phylo_clr

In [None]:
gut_phylo_comp <- readRDS("../../results/Microbiome_analysis/gut_samples_clean_comp.rds")
gut_phylo_comp

In [None]:
gut_phylo_clr <- readRDS("../../results/Microbiome_analysis/gut_samples_clean_clr.rds")
gut_phylo_clr

## Functions

In [None]:
subset_treatment_genera_timepoints <- function(phylo_obj, treatments, genera_of_interest, timepoints) {
	# Get sample data
	sample_data <- data.frame(sample_data(phylo_obj))

	# Filter samples for the specified treatments and timepoints
	samples_to_keep <- rownames(sample_data)[sample_data$treatment %in% treatments & 
                                            sample_data$timepoint %in% timepoints]

	# Prune samples
	phylo_filtered <- prune_samples(samples_to_keep, phylo_obj)

	# Get patients present in all specified timepoints
	samples_df <- data.frame(sample_data(phylo_filtered))

	samples_at_all_timepoints <- samples_df %>%
    group_by(patient) %>%
    summarize(timepoint_count = n_distinct(timepoint)) %>%
    filter(timepoint_count == length(timepoints)) %>%
    pull(patient)

	if(length(samples_at_all_timepoints) == 0) {
		stop("No patients found with data at all specified timepoints")
	}

	# Further filter for patients with all timepoints
	samples_to_keep_final <- rownames(samples_df)[samples_df$patient %in% samples_at_all_timepoints]
	phylo_filtered <- prune_samples(samples_to_keep_final, phylo_filtered)

	# Keep only genera of interest
	tax_table <- as.data.frame(tax_table(phylo_filtered))
	taxa_to_keep <- rownames(tax_table)[tax_table$Genus %in% genera_of_interest]
	phylo_filtered <- prune_taxa(taxa_to_keep, phylo_filtered)

	return(phylo_filtered)
}


In [None]:
prepare_longi <- function(phloseq_object, level) {
  aggregate_phylo <- aggregate_rare(phloseq_object, level = level, detection = 5/100, prevalence = 10/100)

  return(aggregate_phylo)
}

In [None]:
plot_longi <- function(aggregated_phylo, grouping, title) {
  options(repr.plot.width = 15, repr.plot.height = 10)
  
  plot <- plot_composition(
    aggregated_phylo,
    average_by = grouping
  ) +
    xlab(grouping) +
    ggtitle(title) +
    theme_classic() +  
    scale_fill_viridis_d(option = "H") + 
    theme(
      text = element_text(size = 15),
      axis.text.x = element_text(angle = 45, hjust = 1),
      axis.text = element_text(size = 18),    
      axis.title = element_text(size = 20),
      plot.title = element_text(size = 24),
      legend.title = element_text(size = 20),
      legend.text = element_text(size = 18)
    )
  
  return(plot)
}

In [None]:
plot_abundance <- function(phyloseq_clr, phyloseq_comp, comparisons, paired = FALSE, title = "", color_palette = NULL) {
  options(repr.plot.width=25, repr.plot.height=10)  
  
  prepare_data <- function(phyloseq) {
  # Define timepoint order
  timepoint_order <- c("healthy day 0", "CRSwNP control day 0", "0", "28", "90", "180")
  
  # Prepare relative abundance data for plotting
  plot_data <- psmelt(phyloseq)
  plot_data <- plot_data %>%
    group_by(Genus, patient, timepoint) %>%
    summarise(Abundance = sum(Abundance), .groups = "drop") %>%
    mutate(
			timepoint = factor(timepoint, levels = timepoint_order),
			Abundance = Abundance
		)
  
  return(plot_data)
  }
  
  perform_statistical_tests <- function(plot_data, comparisons, paired) {

    genus_list <- split(plot_data, plot_data$Genus)
    
    perform_genus_tests <- function(genus_data, comparisons, paired) {
      stat_test <- do.call(rbind, lapply(comparisons, function(comp) {
        test_data <- genus_data[genus_data$timepoint %in% comp,]
        if(nrow(test_data) < 2) return(NULL)  # Skip if not enough data
        
        test_result <- tryCatch({
          suppressWarnings(
            wilcox.test(Abundance ~ timepoint, data = test_data, paired = paired)
          )
        }, error = function(e) NULL)
        
        if(is.null(test_result)) return(NULL)
        
        data.frame(
          .y. = "Abundance",
          group1 = comp[1],
          group2 = comp[2],
          p = test_result$p.value,
          method = "Wilcoxon",
          Genus = unique(genus_data$Genus)
        )
      }))
      
      if(is.null(stat_test) || nrow(stat_test) == 0) return(NULL)
      
      stat_test$p.adj <- p.adjust(stat_test$p, method = "BH")
      stat_test$p.adj.signif <- ifelse(stat_test$p.adj < 0.001, "***",
                                        ifelse(stat_test$p.adj < 0.01, "**",
                                              ifelse(stat_test$p.adj < 0.05, "*", "ns")))
      return(stat_test)
    }
    
    all_tests <- do.call(rbind, lapply(genus_list, perform_genus_tests, 
                                        comparisons = comparisons, 
                                        paired = paired))
    return(all_tests)
  }
  
  create_plot <- function(df_comp, stat_test){
  p <- ggplot(df_comp, aes(x = timepoint, y = Abundance, fill = Genus, color = Genus)) +
    geom_boxplot(alpha = 0.7, outlier.shape = NA, color = "black") +
    geom_jitter(height = 0, width = 0.3, aes(label = patient)) +  
    labs(x = "", y = "Abundance\n", title = title) +
    scale_fill_viridis_d(option = "H") +
    scale_color_viridis_d(option = "H") +
    theme_classic() +
    facet_wrap(~Genus, scales = "free_y") +
    theme(legend.position = "none",
          axis.text = element_text(size = 18),    
          axis.title = element_text(size = 20),
          strip.text = element_text(size = 20),
          plot.title = element_text(size = 24),
          axis.text.x = element_text(angle = 45, hjust = 1))

    if(!is.null(stat_test) && nrow(stat_test) > 0) {
      # Filter for only significant comparisons
      significant_stats <- stat_test %>% filter(p.adj.signif != "ns")
      
      for (genus in unique(significant_stats$Genus)) {
        genus_stats <- significant_stats %>% filter(Genus == genus)
        if (nrow(genus_stats) > 0) {
          # Calculate the max abundance for this genus
          max_abundance <- max(df_comp$Abundance[df_comp$Genus == genus])
          # Set the starting y position to max + 2%
          start_y_position <- max_abundance * 1.02

          p <- p + stat_pvalue_manual(
            genus_stats,
            label = "p.adj.signif",
            tip.length = 0.01,
            step.increase = 0.05,
            y.position = seq(start_y_position, by = max_abundance * 0.05, length.out = nrow(genus_stats)),
            facet.var = "Genus"
          )
        }
      }
    }
    return(p)
}


  df_clr <- prepare_data(phyloseq_clr)
  df_comp <- prepare_data(phyloseq_comp) %>%
    mutate(Abundance = Abundance * 100) # convert to percentages

  stat_test <- perform_statistical_tests(df_clr, comparisons, paired)
  
  num_genera <- length(unique(df_comp$Genus))
  color_palette <- viridis(num_genera)
  plot <- create_plot(df_comp, stat_test)

  df_comp <- df_comp %>%
  group_by(Genus, timepoint) %>%
  summarise(mean_abundance = mean(Abundance))


  significant_changes <- stat_test %>%
    filter(p.adj.signif != "ns") %>%
    left_join(df_comp, 
            by = c("Genus" = "Genus", 
                  "group1" = "timepoint")) %>%
    rename(group1_mean_abundance = mean_abundance) %>%
    left_join(df_comp, 
            by = c("Genus" = "Genus", 
                  "group2" = "timepoint")) %>%
    rename(group2_mean_abundance = mean_abundance) %>%
    mutate(mean_abundace_change = group1_mean_abundance - group2_mean_abundance) %>%
    select(Genus, group1, group2, group1_mean_abundance, group2_mean_abundance, mean_abundace_change, p.adj)

  return(list(plot = plot, significant_changes = significant_changes))
}


In [None]:
prepare_df_longitudinal_treatment_comparison <- function(phylo_object, selected_timepoint){
    df <- psmelt(phylo_object)

    df_time <- subset(df, timepoint == selected_timepoint)

    df_time <- df_time %>%
        group_by(Genus, treatment, patient) %>%
        summarise(Abundance = sum(Abundance), .groups = "drop")
    
    return(df_time)

}

In [None]:
longitudinal_treatment_comparison_plotting <- function(phyloseq_clr, phyloseq_comp, treatment_groups, title) {
  options(repr.plot.width=20, repr.plot.height=10)
  
  prepare_data <- function(phyloseq) {
    plot_data <- psmelt(phyloseq)
    plot_data <- plot_data %>%
      group_by(Genus, patient, treatment) %>%
      summarise(Abundance = sum(Abundance), .groups = "drop") %>%
      mutate(Abundance = Abundance * 100) 
    
    return(plot_data)
  }

  calculate_statistical_test <- function(df_clr, group1, group2) {
    wilcoxon_results <- df_clr %>%
      group_by(Genus) %>%
      summarise(
        p_value = wilcox.test(
          Abundance[treatment == group1],
          Abundance[treatment == group2],
          paired = FALSE
        )$p.value,
        .groups = "drop"
      ) %>%
      mutate(
        p.adj = p.adjust(p_value, method = "BH"), 
        p.adj.signif = case_when(                 
          p.adj < 0.001 ~ "***",
          p.adj < 0.01 ~ "**",
          p.adj < 0.05 ~ "*",
          TRUE ~ "ns"
        ),
        group1 = group1,
        group2 = group2
      )
    
    return(wilcoxon_results)
  }

  create_plot <- function(df_comp, stat_test){
  p <- ggplot(df_comp, aes(x = treatment, y = Abundance, fill = Genus, color = Genus)) +
    geom_boxplot(alpha = 0.7, outlier.shape = NA, color = "black") +
    geom_jitter(height = 0, width = 0.3) +
    labs(x = "", y = "Abundance (%)\n", title = title) +
    scale_fill_viridis_d(option = "H") +
    scale_color_viridis_d(option = "H") +
    theme_classic() +
    facet_wrap(~Genus, scales = "free_y") +
    theme(legend.position = "none",
          axis.text = element_text(size = 18),
          axis.title = element_text(size = 20),
          strip.text = element_text(size = 18),
          plot.title = element_text(size = 20))

  if(!is.null(stat_test) && nrow(stat_test) > 0) {
    # Filter for only significant comparisons
    significant_stats <- stat_test %>% filter(p.adj.signif != "ns")
    
    for (genus in unique(significant_stats$Genus)) {
      genus_stats <- significant_stats %>% filter(Genus == genus)
      if (nrow(genus_stats) > 0) {
        max_abundance <- max(df_comp$Abundance[df_comp$Genus == genus])
        start_y_position <- max_abundance * 1.02
        p <- p + stat_pvalue_manual(
          genus_stats,
          label = "p.adj.signif",
          y.position = start_y_position,
          tip.length = 0.01,
          facet.var = "Genus"
        )
      }
    }
  }
  return(p)
}

  df_clr <- prepare_data(phyloseq_clr)
  df_comp <- prepare_data(phyloseq_comp)

  df_clr <- df_clr %>% filter(treatment %in% treatment_groups)
  df_comp <- df_comp %>% filter(treatment %in% treatment_groups)
  wilcoxon_results <- calculate_statistical_test(df_clr, treatment_groups[1], treatment_groups[2])
  plot <- create_plot(df_comp, wilcoxon_results)

  df_comp <- df_comp %>%
  group_by(Genus, treatment) %>%
  summarise(mean_abundance = mean(Abundance))

  significant_changes <- wilcoxon_results %>%
    filter(p.adj.signif != "ns") %>% 
    left_join(df_comp, 
            by = c("Genus" = "Genus", 
                  "group1" = "treatment")) %>%
    rename(group1_mean_abundance = mean_abundance) %>%
    left_join(df_comp, 
            by = c("Genus" = "Genus", 
                  "group2" = "treatment")) %>%
    rename(group2_mean_abundance = mean_abundance) %>%
    mutate(mean_abundace_change = group1_mean_abundance - group2_mean_abundance) %>%
    select(Genus, group1, group2, group1_mean_abundance, group2_mean_abundance, mean_abundace_change, p.adj) 

  return(list(plot = plot, significant_changes = significant_changes))
}

In [None]:
subset_treatment_timepoints <- function(phylo_obj, treatments, genera_of_interest, timepoints) {
    # Get sample data
    sample_data <- data.frame(sample_data(phylo_obj))
    
    # Filter samples for the specified treatments and timepoints
    samples_to_keep <- rownames(sample_data)[
			sample_data$treatment %in% treatments & 
			sample_data$timepoint %in% timepoints
		]
    
    # Prune samples
    phylo_filtered <- prune_samples(samples_to_keep, phylo_obj)
    
    # Get patients present in all specified timepoints
    samples_df <- data.frame(sample_data(phylo_filtered))
    
    samples_at_all_timepoints <- samples_df %>%
        group_by(patient) %>%
        summarize(timepoint_count = n_distinct(timepoint)) %>%
        filter(timepoint_count == length(timepoints)) %>%
        pull(patient)
    
    if(length(samples_at_all_timepoints) == 0) {
        stop("No patients found with data at all specified timepoints")
    }
    
    # Further filter for patients with all timepoints
    samples_to_keep_final <- rownames(samples_df)[samples_df$patient %in% samples_at_all_timepoints]
    phylo_filtered <- prune_samples(samples_to_keep_final, phylo_filtered)
    
    return(phylo_filtered)
}

# All genera

## Dupilumab treatment at different timepoints

### Nasal pasage saples

In [None]:
nasal_phylo_comp_dupilumab <-  subset_treatment_timepoints(
  phylo_obj = nasal_phylo_comp,
  treatments = c("Dupilumab_treatment"),
  genera_of_interest = genera_of_interest,
  timepoints = c("0", "28", "90", "180")
)

nasal_phylo_comp_healthy <- subset_treatment_timepoints(
	phylo_obj = nasal_phylo_comp,
	treatment = c("healthy_control"),
	genera_of_interest = genera_of_interest,
	timepoints = c("0")
)

# Update sample data for healthy controls
sample_data(nasal_phylo_comp_healthy)$timepoint <- "healthy day 0"

# Merge healthy controls with the existing dupilumab filtered data
nasal_phylo_comp_modified <- merge_phyloseq(
	nasal_phylo_comp_dupilumab,
	nasal_phylo_comp_healthy
)

# Update sample data for the merged phyloseq object
sample_data(nasal_phylo_comp_modified)$timepoint <- factor(
  sample_data(nasal_phylo_comp_modified)$timepoint,
  levels = c("healthy day 0", "0", "28", "90", "180")
)

nasal_phylo_comp_modified

In [None]:
nasal_dupilumab_longi <- prepare_longi(nasal_phylo_comp_modified, "Genus")

nasal_dupilumab_longi_plot <- plot_longi(
	nasal_dupilumab_longi,
	"timepoint",
	"Nasal passsage microbiome of Dupilumab treated patients"
)

nasal_dupilumab_longi_plot

In [None]:
# Convert to long format to see detailed breakdown
ps_df = psmelt(nasal_dupilumab_longi)

ps_df %>%
    group_by(timepoint, OTU) %>%
    summarise(mean_abundance = mean(Abundance))


### Gut samples

In [None]:
gut_phylo_comp_dupilumab <-  subset_treatment_timepoints(
  phylo_obj = gut_phylo_comp,
  treatments = c("Dupilumab_treatment"),
  genera_of_interest = genera_of_interest,
  timepoints = c("0", "28", "90", "180")
)

gut_phylo_comp_healthy <- subset_treatment_timepoints(
	phylo_obj = gut_phylo_comp,
	treatment = c("healthy_control"),
	genera_of_interest = genera_of_interest,
	timepoints = c("0")
)

# Update sample data for healthy controls
sample_data(gut_phylo_comp_healthy)$timepoint <- "healthy day 0"

# Merge healthy controls with the existing dupilumab filtered data
gut_phylo_comp_modified <- merge_phyloseq(
	gut_phylo_comp_dupilumab,
	gut_phylo_comp_healthy
)

# Update sample data for the merged phyloseq object
sample_data(gut_phylo_comp_modified)$timepoint <- factor(
  sample_data(gut_phylo_comp_modified)$timepoint,
  levels = c("healthy day 0", "0", "28", "90", "180")
)

gut_phylo_comp_modified

In [None]:
gut_dupilumab_longi <- prepare_longi(gut_phylo_comp_modified, "Genus")

gut_dupilumab_longi_plot <- plot_longi(
	gut_dupilumab_longi,
	"timepoint",
	"gut microbiome of Dupilumab treated patients"
)

gut_dupilumab_longi_plot

In [None]:
# Convert to long format to see detailed breakdown
ps_df = psmelt(gut_dupilumab_longi)

ps_df %>%
    group_by(timepoint, OTU) %>%
    summarise(mean_abundance = mean(Abundance))

# Selected genera

In [None]:
genera_of_interest_nasal <- c("Staphylococcus", "Dolosigranulum", "Corynebacterium", "Lactobacillus", "Lawsonella", "Cutibacterium")
genera_of_interest_gut <- c("Bifidobacterium", "Collinsella", "Bacteroides", "Ruminococcus", "Prevotella", "Faecalibacterium")

## Dupilumab samples at different timepoints

In [None]:
comparisons1 <- list(c("0", "28"), c("0", "90"), c("0", "180"),
                    c("28", "90"), c("28", "180"), c("90", "180"))

### Nasal passage samples

In [None]:
nasal_phylo_clr_dupilumab <-  subset_treatment_genera_timepoints(
  phylo_obj = nasal_phylo_clr,
  treatments = c("Dupilumab_treatment"),
  genera_of_interest = genera_of_interest_nasal,
  timepoints = c("0", "28", "90", "180")
)

nasal_phylo_clr_dupilumab

In [None]:
nasal_phylo_comp_dupilumab <- subset_treatment_genera_timepoints(
	phylo_obj = nasal_phylo_comp,
	treatments = c("Dupilumab_treatment"),
	genera_of_interest = genera_of_interest_nasal,
	timepoints = c("0", "28", "90", "180")
)

nasal_phylo_comp_dupilumab

In [None]:
nasal_dupilumab_longi_selected <- plot_abundance(
	nasal_phylo_clr_dupilumab,
	nasal_phylo_comp_dupilumab,
	paired = TRUE, 
	title = "Nasal passage microbiota of Dupilumab treated patients", 
	comparisons = comparisons1
)

nasal_dupilumab_longi_selected_plot <- nasal_dupilumab_longi_selected$plot
nasal_dupilumab_longi_selected_plot

In [None]:
nasal_dupilumab_longi_selected$significant_changes

### Gut samples

In [None]:
gut_phylo_clr_dupilumab <- subset_treatment_genera_timepoints(
	phylo_obj = gut_phylo_clr,
	treatments = c("Dupilumab_treatment"),
	genera_of_interest = genera_of_interest_gut,
	timepoints = c("0", "28", "90", "180")
)

gut_phylo_clr_dupilumab

In [None]:
gut_phylo_comp_dupilumab <- subset_treatment_genera_timepoints(
	phylo_obj = gut_phylo_comp,
	treatments = c("Dupilumab_treatment"),
	genera_of_interest = genera_of_interest_gut,
	timepoints = c("0", "28", "90", "180")
)

gut_phylo_comp_dupilumab

In [None]:
gut_dupilumab_longi_selected <- plot_abundance(
	gut_phylo_clr_dupilumab,
	gut_phylo_comp_dupilumab,
	paired = TRUE, 
	title = "Gut microbiota of Dupilumab treated patients", 
	comparisons = comparisons1
)

gut_dupilumab_longi_selected_plot <- gut_dupilumab_longi_selected$plot
gut_dupilumab_longi_selected_plot

In [None]:
gut_dupilumab_longi_selected$significant_changes

## Healthy controls on day 0 vs Dupilumab all timepoints

In [None]:
comparisons3 <- list(
	c("healthy day 0", "CRSwNP control day 0"),
	c("healthy day 0", "0"), 
	c("healthy day 0", "28"), 
	c("healthy day 0", "90"),
	c("healthy day 0", "180")
)

### Nasal passage samples

In [None]:
nasal_phylo_clr_healthy <- subset_treatment_genera_timepoints(
	phylo_obj = nasal_phylo_clr,
	treatments = "healthy_control",
	genera_of_interest = genera_of_interest_nasal,
	timepoints = c("0")
)
# Update sample data for healthy controls
sample_data(nasal_phylo_clr_healthy)$timepoint <- "healthy day 0"

nasal_phylo_clr_CRS <- subset_treatment_genera_timepoints(
	phylo_obj = nasal_phylo_clr,
	treatments = "CRS_control",
	genera_of_interest = genera_of_interest_nasal,
	timepoints = c("0")
)

# Update sample data for CRS controls
sample_data(nasal_phylo_clr_CRS)$timepoint <- "CRSwNP control day 0"

# Merge controls with the existing dupilumab filtered data
nasal_phylo_clr_modified <- merge_phyloseq(nasal_phylo_clr_dupilumab, nasal_phylo_clr_healthy, nasal_phylo_clr_CRS)

# Update sample data for the merged phyloseq object
sample_data(nasal_phylo_clr_modified)$timepoint <- factor(
  sample_data(nasal_phylo_clr_modified)$timepoint,
  levels = c("healthy day 0", "CRSwNP control day 0", "0", "28", "90", "180")
)

nasal_phylo_clr_modified


In [None]:
nasal_phylo_comp_healthy <- subset_treatment_genera_timepoints(
	phylo_obj = nasal_phylo_comp,
	treatments = "healthy_control",
	genera_of_interest = genera_of_interest_nasal,
	timepoints = c("0")
)
# Update sample data for healthy controls
sample_data(nasal_phylo_comp_healthy)$timepoint <- "healthy day 0"

nasal_phylo_comp_CRS <- subset_treatment_genera_timepoints(
	phylo_obj = nasal_phylo_comp,
	treatments = "CRS_control",
	genera_of_interest = genera_of_interest_nasal,
	timepoints = c("0")
)
# Update sample data for CRS controls
sample_data(nasal_phylo_comp_CRS)$timepoint <- "CRSwNP control day 0"

# Merge controls with the existing dupilumab filtered data
nasal_phylo_comp_modified <- merge_phyloseq(
	nasal_phylo_comp_dupilumab,
	nasal_phylo_comp_healthy,
	nasal_phylo_comp_CRS
)

# Update sample data for the merged phyloseq object
sample_data(nasal_phylo_comp_modified)$timepoint <- factor(
  sample_data(nasal_phylo_comp_modified)$timepoint,
  levels = c("healthy day 0", "CRSwNP control day 0", "0", "28", "90", "180")
)

nasal_phylo_comp_modified

In [None]:
nasal_dupilumab_healthy_longi_selected <- plot_abundance(
	nasal_phylo_clr_modified,
	nasal_phylo_comp_modified,
	paired = FALSE, 
	title = "Nasal passage microbiota treated by Dupilumab compared to healthy controls on day 0", 
	comparisons = comparisons3
)

nasal_dupilumab_healthy_longi_selected_plot <- nasal_dupilumab_healthy_longi_selected$plot
nasal_dupilumab_healthy_longi_selected_plot

In [None]:
nasal_dupilumab_healthy_longi_selected$significant_changes

### Gut samples

In [None]:
gut_phylo_clr_healthy <- subset_treatment_genera_timepoints(
	phylo_obj = gut_phylo_clr,
	treatments = "healthy_control",
	genera_of_interest = genera_of_interest_gut,
	timepoints = c("0")
)
# Update sample data for healthy controls
sample_data(gut_phylo_clr_healthy)$timepoint <- "healthy day 0"

gut_phylo_clr_CRS <- subset_treatment_genera_timepoints(
	phylo_obj = gut_phylo_clr,
	treatments = "CRS_control",
	genera_of_interest = genera_of_interest_gut,
	timepoints = c("0")
)
# Update sample data for CRS controls
sample_data(gut_phylo_clr_CRS)$timepoint <- "CRSwNP control day 0"

# Merge controls with the existing dupilumab filtered data
gut_phylo_clr_modified <- merge_phyloseq(
	gut_phylo_clr_dupilumab,
	gut_phylo_clr_healthy,
	gut_phylo_clr_CRS
)

# Update sample data for the merged phyloseq object
sample_data(gut_phylo_clr_modified)$timepoint <- factor(
  sample_data(gut_phylo_clr_modified)$timepoint,
  levels = c("healthy day 0", "CRSwNP control day 0", "0", "28", "90", "180")
)

gut_phylo_clr_modified

In [None]:
gut_phylo_comp_healthy <- subset_treatment_genera_timepoints(
	phylo_obj = gut_phylo_comp,
	treatments = "healthy_control",
	genera_of_interest = genera_of_interest_gut,
	timepoints = c("0")
)
# Update sample data for healthy controls
sample_data(gut_phylo_comp_healthy)$timepoint <- "healthy day 0"

gut_phylo_comp_CRS <- subset_treatment_genera_timepoints(
	phylo_obj = gut_phylo_comp,
	treatments = "CRS_control",
	genera_of_interest = genera_of_interest_gut,
	timepoints = c("0")
)
# Update sample data for CRS controls
sample_data(gut_phylo_comp_CRS)$timepoint <- "CRSwNP control day 0"

# Merge controls with the existing dupilumab filtered data
gut_phylo_comp_modified <- merge_phyloseq(
	gut_phylo_comp_dupilumab,
	gut_phylo_comp_healthy,
	gut_phylo_comp_CRS
)

# Update sample data for the merged phyloseq object
sample_data(gut_phylo_comp_modified)$timepoint <- factor(
  sample_data(gut_phylo_comp_modified)$timepoint,
  levels = c("healthy day 0", "CRSwNP control day 0", "0", "28", "90", "180")
)

gut_phylo_comp_modified

In [None]:
gut_dupilumab_healthy_longi_selected <- plot_abundance(
	gut_phylo_clr_modified,
	gut_phylo_comp_modified,
	paired = FALSE, 
	title = "Gut microbiota treated by Dupilumab compared to healthy controls on day 0", 
	comparisons = comparisons3
)

gut_dupilumab_healthy_longi_selected_plot <- gut_dupilumab_healthy_longi_selected$plot
gut_dupilumab_healthy_longi_selected_plot

In [None]:
gut_dupilumab_healthy_longi_selected$significant_changes

# Saving plots

In [None]:
# Specify the path where you want to save the plots
save_path <- "../../results/Microbiome_analysis/plots/differential_abundance"

# Create the directory if it doesn't exist
if (!dir.exists(save_path)) {
  dir.create(save_path, recursive = TRUE)
}

# Set plot dimensions and DPI
plot_width <- 25
plot_height <- 10
plot_dpi <- 300

# Get all variables ending with "_plot"
plot_vars <- ls(pattern = "_plot$")

# Function to save a plot as both PNG and PDF
save_plots <- function(plot_name) {
  plot_obj <- get(plot_name)
  
  # Save as PNG
  png_filename <- file.path(save_path, paste0(plot_name, ".png"))
  pdf_filename <- file.path(save_path, paste0(plot_name, ".pdf"))
  
  # Check if it's a ggplot object
  if (inherits(plot_obj, "ggplot")) {
    ggsave(png_filename, plot = plot_obj, width = plot_width, height = plot_height, dpi = plot_dpi, units = "in")
    ggsave(pdf_filename, plot = plot_obj, width = plot_width, height = plot_height, units = "in")
  } else {
    # Assume it's a base R plot
    png(png_filename, width = plot_width, height = plot_height, units = "in", res = plot_dpi)
    print(plot_obj)
    dev.off()
    
    pdf(pdf_filename, width = plot_width, height = plot_height)
    print(plot_obj)
    dev.off()
  }
  
  cat("Saved:", png_filename, "\n")
  cat("Saved:", pdf_filename, "\n")
}

# Save each plot
invisible(sapply(plot_vars, save_plots))

cat("Saved", length(plot_vars), "plots (both PNG and PDF) in", save_path, "\n")
cat("Plot dimensions:", plot_width, "x", plot_height, "inches, DPI:", plot_dpi, "\n")