In [None]:
# Loading libraries:
suppressMessages({ 
    library(plyr)
    library(dplyr)
    library(data.table)
    library(ggplot2)
    library(IRdisplay)
    library(metagenomeSeq)
    library(microbiomeMarker)
    library(phyloseq)
    library(stringr)
    library(tidyverse)
    library(vegan)
})

# Reading KOs table and transforming into a phyloseq object

In [None]:
# Reading and cleaning table
data <- read.table("human_kos_presabs.tsv", header = TRUE, sep = "\t", row.names = 1)
data <- data[!apply(data, 1, function(x) all(x == 0 | is.na(x))), ]
data <- data[, apply(data, 2, function(x) any(x != 0))]
otumat <- as.matrix(data[-1])
                     
# Saving data for phyloseq object                     
OTU = otu_table(otumat, taxa_are_rows = TRUE)
sampledata <- data.frame(sample_types = sub("^[^_]*_", "", colnames(otumat)))
rownames(sampledata) = colnames(otumat)
SAM = sample_data(sampledata)

# Building the phyloseq object
physeq = phyloseq(OTU, SAM)

In [None]:
# Keeping only the optimal subsampling for shallowmapping-bwa and shogun
physeq_filt <- subset_samples(physeq, sample_types %in% c('pic', 'mfp', 'deep', '10_sho', '10_bwa'))
metadata = data.frame(sample_data(physeq_filt))

# Statistical analysis of groups comparison for functions

## Global ANOSIM

In [None]:
anosim(phyloseq::distance(physeq_filt, method="jaccard"), metadata$sample_types)

## Global betadisper and Adonis2

In [None]:
css_beta = distance(physeq_filt, method="jaccard")
ado_result = adonis2(css_beta ~ sample_types, data = metadata, perm=1e3)
#ado_result

bd = betadisper(css_beta, metadata$'sample_types')
anova(bd)

boxplot(bd)
plot(bd)

## Tukey's HSD test on dispersion for functions

In [None]:
# Tukey's HSD test
tukey_table <- TukeyHSD(bd)

# Extract the results and convert them to data frames
tukey_df_list <- lapply(tukey_table, as.data.frame)

# Combine all components into a single data frame and write to one file
# Add a column to identify the factor
tukey_df_combined <- do.call(rbind, lapply(names(tukey_df_list), function(name) {
  df <- tukey_df_list[[name]]
  df$Comparison <- rownames(df)  # Add comparison names
  rownames(df) <- NULL  # Remove row names
  df$Factor <- name
  return(df)
}))

# Write to a single file
write.table(tukey_df_combined, "human_tukey_bd_kos.tsv", row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")


## Pairwise ANOSIM for functions

In [None]:
cbn <- combn(x = unique(metadata$sample_types), m = 2)
p <- c()  # vector to store p values
R <- c()  # vector to store R values

for (i in 1:ncol(cbn)) {
    ps.subs <- subset_samples(physeq, sample_types %in% cbn[, i])
    metadata_sub <- data.frame(sample_data(ps.subs))
    permanova_pairwise <- anosim(phyloseq::distance(ps.subs, method="jaccard"), 
                                 metadata_sub$sample_types)
    p <- c(p, permanova_pairwise$signif[1])  # Store p-values
    R <- c(R, permanova_pairwise$statistic)  # Store R statistics
}

p.adj <- p.adjust(p, method = "BH")  # Adjust p-values
p.table <- cbind.data.frame(t(cbn), R = R, p = p, p.adj = p.adj)  # Combine all results in a table

write.table(p.table, "human_pairwise_anosim_kos.tsv", row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")


### Pairwise ANOSIM heatmap

Criteria for R-value color codes:

R-value Near 0:
R ~ 0.0 to 0.2: This range typically suggests that there is little to no observable difference between the groups. The samples can be considered very similar in composition.

Moderate R-values:
R ~ 0.2 to 0.5: This range suggests moderate differentiation. Samples might be considered somewhat different, but the distinction isn't very strong. Interpretation in this range can depend on the sensitivity required in the study and the natural variability of the dataset.
High R-values:

R-value close to 1:
R > 0.5: This range indicates strong differentiation between samples. The higher the R-value, especially approaching or exceeding 0.7, the more distinct the community compositions between the groups. Samples with these R-values can be considered to have different compositions.


In [None]:
library(pheatmap)

In [None]:
data <- read.table("human_pairwise_anosim_kos.tsv", header = TRUE, sep = "\t")
samples <- unique(c(data$X1, data$X2))
r_matrix <- matrix(NA, nrow = length(samples), ncol = length(samples), 
                   dimnames = list(samples, samples))

for (i in 1:nrow(data)) {
    row <- which(samples == data$X1[i])
    col <- which(samples == data$X2[i])
    r_matrix[row, col] <- data$R[i]
    r_matrix[col, row] <- data$R[i]  # Ensure the matrix is symmetric
}

desired_order <- c("deep", "10_bwa", "10_sho", "mfp", "pic")
r_matrix <- r_matrix[desired_order, desired_order]

# Set the lower triangle to NA
r_matrix[lower.tri(r_matrix)] <- NA

# Define the colors and breaks
colors <- c(
  colorRampPalette(c("#a80202", "#ff6403"))(20),    # Gradient from 0 to 0.2
  colorRampPalette(c("#ff6403", "#ffda05"))(30),    # Sharp change from 0.2 to 0.5
  colorRampPalette(c("#ffda05", "black"))(50)       # Sharp change from 0.5 to 1.0
)
breaks <- c(
  seq(0, 0.2, length.out = 21),     # Breaks from 0 to 0.2
  seq(0.2, 0.5, length.out = 31)[-1],   # Breaks from 0.2 to 0.5 (excluding duplicate 0.2)
  seq(0.5, 1.0, length.out = 51)[-1]    # Breaks from 0.5 to 1.0 (excluding duplicate 0.5)
)

# Plot the heatmap
pdf("human_rvalues_kos.pdf", width = 7, height = 5)
pheatmap(r_matrix,
         color = colors,
         breaks = breaks,
         cluster_rows = FALSE,  # Disable clustering to preserve the order
         cluster_cols = FALSE,  # Disable clustering to preserve the order
         show_rownames = TRUE,
         show_colnames = TRUE,
         display_numbers = FALSE,
         na_col = "white")  # Set the color for NA values
dev.off()


# Ordination plots for functions

In [None]:
# Reading and cleaning table
data <- read.table("human_kos_presabs.tsv", header = TRUE, sep = "\t", row.names = 1)
data <- data[!apply(data, 1, function(x) all(x == 0 | is.na(x))), ]
data <- data[, apply(data, 2, function(x) any(x != 0))]
otumat <- as.matrix(data[-1])
                     
# Saving data for phyloseq object                     
OTU = otu_table(otumat, taxa_are_rows = TRUE)
sampledata <- data.frame(sample_types = sub("^[^_]*_", "", colnames(otumat)))
rownames(sampledata) = colnames(otumat)
SAM = sample_data(sampledata)

# Building the phyloseq object
physeq = phyloseq(OTU, SAM)

# Keeping only the optimal subsampling for shallowmapping-bwa and shogun
physeq_filt <- subset_samples(physeq, sample_types %in% c('pic', 'mfp', 'deep', '10_sho', '10_bwa'))
metadata = data.frame(sample_data(physeq_filt))


In [None]:
colorCode_sample <- c(
  "10_bwa" = "#3CB44B",
  "10_sho" = "#f77a13",
  "deep" = "#000000",
  "mfp" = "#780164",
  "pic" = "#099fe0"
)

# Testing different ordination methods
dist = "jaccard"
ord_meths = c("DCA", "CCA", "RDA", "MDS", "PCoA")
plist = llply(as.list(ord_meths), function(i, physeq_obj, dist){
        ordi = ordinate(physeq_obj, method=i, distance=dist)
        plot_ordination(physeq_obj, ordi, "samples")
}, physeq_filt, dist)

names(plist) <- ord_meths
pdataframe = ldply(plist, function(x){
    df = x$data[, 1:2]
    colnames(df) = c("Axis_1", "Axis_2")
    return(cbind(df, x$data))
})
names(pdataframe)[1] = "method"

In [None]:
# Saving all the plots to individual pdf files
for (index in 1:5) {
    plot_type = ord_meths[[index]]
    file_name = paste0('human_', plot_type, '_kos.pdf')
    pdf(file_name, width = 5, height = 4)
    p = plist[[index]] + 
        geom_point(size=2, alpha=1, aes(color=metadata$sample_types)) + 
        stat_ellipse(level=0.9, type="norm", geom="polygon", alpha=0, aes(color=metadata$sample_types)) +
        theme_bw() + 
        scale_color_manual(values=colorCode_sample) +
        labs(color = "Groups") 
    print(p)
    dev.off()
}

In [None]:
# Displaying the plots with all the methods to choose the best
options(repr.plot.width=7, repr.plot.height=6)
print_plots <- function() {
    for (index in 1:5) {
        p = plist[[index]] + geom_point(size=2, alpha=1, aes(color=metadata$sample_types)) + 
            stat_ellipse(level=0.9, type="norm", geom="polygon", alpha=0, aes(color=metadata$sample_types)) +
            theme_bw() + 
            scale_color_manual(values=colorCode_sample) +
            labs(color = "Groups") 
        print(p)
    }
}
print_plots()

# Statistical analysis of groups comparison for taxonomy

## Groups comparison and ordination plots for taxonomy

In [None]:
# Building a clean phyloseq object and filter in the sallow-shotgun data with the best subsampling only
# Reading table
data <- read.table("human_relab_species.tsv", header = TRUE, sep = "\t", row.names = 1)
data <- data[!apply(data, 1, function(x) all(x == 0 | is.na(x))), ]
data <- data[, apply(data, 2, function(x) any(x != 0))]
otumat <- as.matrix(data[-1])
OTU = otu_table(otumat, taxa_are_rows = TRUE)

sampledata <- data.frame(sample_types = sub("^[^_]*_", "", colnames(otumat)))
rownames(sampledata) = colnames(otumat)

SAM = sample_data(sampledata)
physeq = phyloseq(OTU, SAM)

# Keeping only 1M reads samples for shallowmapping-bwa and shogun
physeq_filt <- subset_samples(physeq, sample_types %in% c('amp', 'deep', '10_sho', '10_bwa'))
metadata = data.frame(sample_data(physeq_filt))

### Global betadisper and Adonis2 for taxonomy

In [None]:
css_beta = distance(physeq_filt, method="bray")
ado_result = adonis2(css_beta ~ sample_types, data = metadata, perm=1e3)
ado_result

In [None]:
bd = betadisper(css_beta, metadata$'sample_types')
anova(bd)

boxplot(bd)
plot(bd)

### Tukey's HSD test on dispersion for taxonomy (when betadisper global P-val < 0.01)

In [None]:
tukey_table <- TukeyHSD(bd)

# Extract the results and convert them to data frames
tukey_df_list <- lapply(tukey_table, as.data.frame)

# Option 2: Combine all components into a single data frame and write to one file
# Add a column to identify the factor
tukey_df_combined <- do.call(rbind, lapply(names(tukey_df_list), function(name) {
  df <- tukey_df_list[[name]]
  df$Comparison <- rownames(df)  # Add comparison names
  rownames(df) <- NULL  # Remove row names
  df$Factor <- name
  return(df)
}))

# Write to a single file
write.table(tukey_df_combined, "human_tukey_bd_species.tsv", row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")


### Pairwise ANOSIM for taxonomy

In [None]:
cbn <- combn(x = unique(metadata$sample_types), m = 2)
p <- c()  # vector to store p values
R <- c()  # vector to store R values

for (i in 1:ncol(cbn)) {
    ps.subs <- subset_samples(physeq_filt, sample_types %in% cbn[, i])
    metadata_sub <- data.frame(sample_data(ps.subs))
    permanova_pairwise <- anosim(phyloseq::distance(ps.subs, method="bray"), 
                                 metadata_sub$sample_types)
    p <- c(p, permanova_pairwise$signif[1])  # Store p-values
    R <- c(R, permanova_pairwise$statistic)  # Store R statistics
}

p.adj <- p.adjust(p, method = "BH")  # Adjust p-values
p.table <- cbind.data.frame(t(cbn), R = R, p = p, p.adj = p.adj)  # Combine all results in a table

write.table(p.table, "human_pairwise_anosim_species.tsv", row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")

### Heatmap of pairwise ANOSIM for taxonomy

In [None]:
library(pheatmap)

In [None]:
data <- read.table("human_pairwise_anosim_species.tsv", header = TRUE, sep = "\t")
samples <- unique(c(data$X1, data$X2))
r_matrix <- matrix(NA, nrow = length(samples), ncol = length(samples), 
                   dimnames = list(samples, samples))

for (i in 1:nrow(data)) {
    row <- which(samples == data$X1[i])
    col <- which(samples == data$X2[i])
    r_matrix[row, col] <- data$R[i]
    r_matrix[col, row] <- data$R[i]  # Ensure the matrix is symmetric
}

desired_order <- c("deep", "10_bwa", "10_sho", "amp")
r_matrix <- r_matrix[desired_order, desired_order]

# Set the lower triangle to NA
r_matrix[lower.tri(r_matrix)] <- NA

# Define the colors and breaks
colors <- c(
  colorRampPalette(c("#a80202", "#ff6403"))(20),    # Gradient from 0 to 0.2
  colorRampPalette(c("#ff6403", "#ffda05"))(30),    # Sharp change from 0.2 to 0.5
  colorRampPalette(c("#ffda05", "black"))(50)       # Sharp change from 0.5 to 1.0
)
breaks <- c(
  seq(0, 0.2, length.out = 21),     # Breaks from 0 to 0.2
  seq(0.2, 0.5, length.out = 31)[-1],   # Breaks from 0.2 to 0.5 (excluding duplicate 0.2)
  seq(0.5, 1.0, length.out = 51)[-1]    # Breaks from 0.5 to 1.0 (excluding duplicate 0.5)
)

# Plot the heatmap
pdf("human_rvalues_species.pdf", width = 7, height = 5)
pheatmap(r_matrix,
         color = colors,
         breaks = breaks,
         cluster_rows = FALSE,  # Disable clustering to preserve the order
         cluster_cols = FALSE,  # Disable clustering to preserve the order
         show_rownames = TRUE,
         show_colnames = TRUE,
         display_numbers = FALSE,
         na_col = "white")  # Set the color for NA values
dev.off()


### Ordination plots for taxonomy

In [None]:
# Building a clean phyloseq object and filter in the sallow-shotgun data with the best subsampling only
# Reading table
data <- read.table("human_relab_species.tsv", header = TRUE, sep = "\t", row.names = 1)
data <- data[!apply(data, 1, function(x) all(x == 0 | is.na(x))), ]
data <- data[, apply(data, 2, function(x) any(x != 0))]
otumat <- as.matrix(data[-1])
OTU = otu_table(otumat, taxa_are_rows = TRUE)

sampledata <- data.frame(sample_types = sub("^[^_]*_", "", colnames(otumat)))
rownames(sampledata) = colnames(otumat)

SAM = sample_data(sampledata)
physeq = phyloseq(OTU, SAM)

# Keeping only 1M reads samples for shallowmapping-bwa and shogun
physeq_filt <- subset_samples(physeq, sample_types %in% c('amp', 'deep', '10_sho', '10_bwa'))
metadata = data.frame(sample_data(physeq_filt))

In [None]:
colorCode_sample <- c(
  "10_bwa" = "#3CB44B",
  "10_sho" = "#f77a13",
  "deep" = "#000000",
  "amp" = "#3402c9"
)

# Testing different ordination methods
dist = "bray"
ord_meths = c("DCA", "CCA", "RDA", "MDS", "PCoA")
plist = llply(as.list(ord_meths), function(i, physeq_obj, dist){
        ordi = ordinate(physeq_obj, method=i, distance=dist)
        plot_ordination(physeq_obj, ordi, "samples")
}, physeq_filt, dist)

names(plist) <- ord_meths
pdataframe = ldply(plist, function(x){
    df = x$data[, 1:2]
    colnames(df) = c("Axis_1", "Axis_2")
    return(cbind(df, x$data))
})
names(pdataframe)[1] = "method"


In [None]:
# Saving all the plots to individual pdf files
for (index in 1:5) {
    plot_type = ord_meths[[index]]
    file_name = paste0('human_', plot_type, '_species.pdf')
    pdf(file_name, width = 5, height = 4)
    p = plist[[index]] + 
        geom_point(size=2, alpha=1, aes(color=metadata$sample_types)) + 
        stat_ellipse(level=0.9, type="norm", geom="polygon", alpha=0, aes(color=metadata$sample_types)) +
        theme_bw() + 
        scale_color_manual(values=colorCode_sample) +
        labs(color = "Groups") 
    print(p)
    dev.off()
}

In [None]:
# Displaying the plots with all the methods to find the best
options(repr.plot.width=7, repr.plot.height=6)
print_plots <- function() {
    for (index in 1:5) {
        p = plist[[index]] + geom_point(size=2, alpha=1, aes(color=metadata$sample_types)) + 
            stat_ellipse(level=0.9, type="norm", geom="polygon", alpha=0, aes(color=metadata$sample_types)) +
            theme_bw() + 
            scale_color_manual(values=colorCode_sample) +
            labs(color = "Groups") 
        print(p)
    }
}
print_plots()


# Analysis of samples groups on Helathy cohort 1 (TMAO paper)

## Features profiles analysis based on high and low TMAO producers

### Taxonomic annotation at species level analysis

In [None]:
# Building a clean phyloseq object for taxonomic annotations
data <- read.table("tmao_taxo_matrix.tsv", header = TRUE, sep = "\t", row.names = 1)
data <- data[!apply(data, 1, function(x) all(x == 0 | is.na(x))), ]
data <- data[, apply(data, 2, function(x) any(x != 0))]
otumat <- as.matrix(data[-1])
OTU = otu_table(otumat, taxa_are_rows = TRUE)

# Building metadata object with groups formation info (all individuals)
second_elements <- sapply(strsplit(colnames(otumat), "_"), function(x) x[2])
sampledata <- data.frame(sample_types = ifelse(grepl("HP", second_elements), "High", "Low"))
rownames(sampledata) = colnames(otumat)
#head(sampledata, 15)

SAM = sample_data(sampledata)
physeq = phyloseq(OTU, SAM)
metadata = data.frame(sample_data(physeq))
                          
# Global ANOSIM all individuals
anosim(phyloseq::distance(physeq, method="bray"), metadata$sample_types)                          
                          
# Global ADONIS all individuals
css_beta = distance(physeq, method="bray")
ado_result = adonis2(css_beta ~ sample_types, data = metadata, perm=1e3, na.rm = TRUE)
ado_result                          
                          
# Betadisper all individuals
bd = betadisper(css_beta, metadata$'sample_types')
anova(bd)

options(repr.plot.width=10, repr.plot.height=10)
plot(bd)                                                    

In [None]:
# Separating vegetarian and omnivores
# Extract the sample data
sample_data_df <- sample_data(physeq)

# Subset phyloseq_veg for samples with names starting with 'V'
physeq_veg <- subset_samples(physeq, grepl("^V", rownames(sample_data_df)))
metadata_veg = data.frame(sample_data(physeq_veg))

# ANOSIM vegetarian
anosim(phyloseq::distance(physeq_veg, method="bray"), metadata_veg$sample_types)                          

# ADONIS vegetarian
v_css_beta = distance(physeq_veg, method="bray")
v_ado_result = adonis2(v_css_beta ~ sample_types, data = metadata_veg, perm=1e3, na.rm = TRUE)
v_ado_result

# Betadisper vegetarian
v_bd = betadisper(v_css_beta, metadata_veg$'sample_types')
anova(v_bd)
plot(v_bd)                                                    

# Subset phyloseq_omn for samples with names starting with 'O'
physeq_omn <- subset_samples(physeq, grepl("^O", rownames(sample_data_df)))
metadata_omn = data.frame(sample_data(physeq_omn))

# ANOSIM omnivores
anosim(phyloseq::distance(physeq_omn, method="bray"), metadata_omn$sample_types)                          

# ADONIS omnivores
o_css_beta = distance(physeq_omn, method="bray")
o_ado_result = adonis2(o_css_beta ~ sample_types, data = metadata_omn, perm=1e3, na.rm = TRUE)
o_ado_result

# Betadisper omnivores
o_bd = betadisper(o_css_beta, metadata_omn$'sample_types')
anova(o_bd)
plot(o_bd)                                                    

### Functional annotation KOs

In [None]:
# Building a clean phyloseq object for KOs annotations
data <- read.table("tmao_kos_matrix.tsv", header = TRUE, sep = "\t", row.names = 1)
data <- data[!apply(data, 1, function(x) all(x == 0 | is.na(x))), ]
data <- data[, apply(data, 2, function(x) any(x != 0))]
otumat <- as.matrix(data[-1])
OTU = otu_table(otumat, taxa_are_rows = TRUE)

# Building metadata object with groups formation info (all individuals)
second_elements <- sapply(strsplit(colnames(otumat), "_"), function(x) x[2])
sampledata <- data.frame(sample_types = ifelse(grepl("HP", second_elements), "High", "Low"))
rownames(sampledata) = colnames(otumat)
#head(sampledata, 15)
                          
SAM = sample_data(sampledata)
physeq = phyloseq(OTU, SAM)
metadata = data.frame(sample_data(physeq))

# Normalisation by cumulative sum scaling (CSS)
physeq_CSS = normalize(physeq, method = "CSS")                          
                                                    
# ANOSIM all individuals
anosim(phyloseq::distance(physeq_CSS, method="bray"), metadata$sample_types)                          
                                          
# ADONIS all individuals
css_beta = distance(physeq_CSS, method="bray")
ado_result = adonis2(css_beta ~ sample_types, data = metadata, perm=1e3, na.rm = TRUE)
ado_result
                          
# Betadisper all individuals
bd = betadisper(css_beta, metadata$'sample_types')
anova(bd)

options(repr.plot.width=10, repr.plot.height=10)
plot(bd)                          

In [None]:
# Separating vegetarian and omnivores
# Extract the sample data
sample_data_df <- sample_data(physeq)

# Subset phyloseq_veg for samples with names starting with 'V'
physeq_veg <- subset_samples(physeq, grepl("^V", rownames(sample_data_df)))
metadata_veg = data.frame(sample_data(physeq_veg))

# Normalisation by cumulative sum scaling (CSS)
physeq_CSS_veg = normalize(physeq_veg, method = "CSS")                          

# ANOSIM vegetarian
anosim(phyloseq::distance(physeq_CSS_veg, method="bray"), metadata_veg$sample_types)                          

# ADONIS vegetarian
v_css_beta = distance(physeq_CSS_veg, method="bray")
v_ado_result = adonis2(v_css_beta ~ sample_types, data = metadata_veg, perm=1e3, na.rm = TRUE)
v_ado_result

# Betadisper vegetarian
v_bd = betadisper(v_css_beta, metadata_veg$'sample_types')
anova(v_bd)
plot(v_bd)                                                    

# Subset phyloseq_omn for samples with names starting with 'O'
physeq_omn <- subset_samples(physeq, grepl("^O", rownames(sample_data_df)))
metadata_omn = data.frame(sample_data(physeq_omn))

# Normalisation by cumulative sum scaling (CSS)
physeq_CSS_omn = normalize(physeq_omn, method = "CSS")                          

# ANOSIM omnivores
anosim(phyloseq::distance(physeq_CSS_omn, method="jaccard"), metadata_omn$sample_types)                          

# ADONIS omnivores
o_css_beta = distance(physeq_CSS_omn, method="jaccard")
o_ado_result = adonis2(o_css_beta ~ sample_types, data = metadata_omn, perm=1e3, na.rm = TRUE)
o_ado_result

# Betadisper omnivores
o_bd = betadisper(o_css_beta, metadata_omn$'sample_types')
anova(o_bd)
plot(o_bd)                                                    

### Functional annotation Pfams

In [None]:
# Building a clean phyloseq object for KOs annotations
data <- read.table("tmao_pfam_matrix.tsv", header = TRUE, sep = "\t", row.names = 1)
data <- data[!apply(data, 1, function(x) all(x == 0 | is.na(x))), ]
data <- data[, apply(data, 2, function(x) any(x != 0))]
otumat <- as.matrix(data[-1])
OTU = otu_table(otumat, taxa_are_rows = TRUE)

# Building metadata object with groups formation info (all individuals)
second_elements <- sapply(strsplit(colnames(otumat), "_"), function(x) x[2])
sampledata <- data.frame(sample_types = ifelse(grepl("HP", second_elements), "High", "Low"))
rownames(sampledata) = colnames(otumat)
#head(sampledata, 15)
                          
SAM = sample_data(sampledata)
physeq = phyloseq(OTU, SAM)
metadata = data.frame(sample_data(physeq))

# Normalisation by cumulative sum scaling (CSS)
physeq_CSS = normalize(physeq, method = "CSS")                          
                                                    
# ANOSIM all individuals
anosim(phyloseq::distance(physeq_CSS, method="bray"), metadata$sample_types)                          
                                                    
# ADONIS all individuals
css_beta = distance(physeq_CSS, method="bray")
ado_result = adonis2(css_beta ~ sample_types, data = metadata, perm=1e3, na.rm = TRUE)
ado_result
                          
# Betadisper all individuals
bd = betadisper(css_beta, metadata$'sample_types')
anova(bd)

options(repr.plot.width=10, repr.plot.height=10)
plot(bd)                          

In [None]:
# Separating vegetarian and omnivores
# Extract the sample data
sample_data_df <- sample_data(physeq)

# Subset phyloseq_veg for samples with names starting with 'V'
physeq_veg <- subset_samples(physeq, grepl("^V", rownames(sample_data_df)))
metadata_veg = data.frame(sample_data(physeq_veg))

# Normalisation by cumulative sum scaling (CSS)
physeq_CSS_veg = normalize(physeq_veg, method = "CSS")                          

# ANOSIM vegetarian
anosim(phyloseq::distance(physeq_CSS_veg, method="bray"), metadata_veg$sample_types)                          

# ADONIS vegetarian
v_css_beta = distance(physeq_CSS_veg, method="bray")
v_ado_result = adonis2(v_css_beta ~ sample_types, data = metadata_veg, perm=1e3, na.rm = TRUE)
v_ado_result

# Betadisper vegetarian
v_bd = betadisper(v_css_beta, metadata_veg$'sample_types')
anova(v_bd)
plot(v_bd)                                                    

# Subset phyloseq_omn for samples with names starting with 'O'
physeq_omn <- subset_samples(physeq, grepl("^O", rownames(sample_data_df)))
metadata_omn = data.frame(sample_data(physeq_omn))

# Normalisation by cumulative sum scaling (CSS)
physeq_CSS_omn = normalize(physeq_omn, method = "CSS")                          

# ANOSIM omnivores
anosim(phyloseq::distance(physeq_CSS_omn, method="bray"), metadata_omn$sample_types)                          

# ADONIS omnivores
o_css_beta = distance(physeq_CSS_omn, method="bray")
o_ado_result = adonis2(o_css_beta ~ sample_types, data = metadata_omn, perm=1e3, na.rm = TRUE)
o_ado_result

# Betadisper omnivores
o_bd = betadisper(o_css_beta, metadata_omn$'sample_types')
anova(o_bd)
plot(o_bd)                                                    

## Features profiles analysis based on L-carnitine intervention on Helathy cohort 1 (all, omnivores, vegetarian)

### Taxonomic annotation at species level analysis

In [None]:
# Building a clean phyloseq object for taxonomic annotations
data <- read.table("tmao_taxo_matrix.tsv", header = TRUE, sep = "\t", row.names = 1)
data <- data[!apply(data, 1, function(x) all(x == 0 | is.na(x))), ]
data <- data[, apply(data, 2, function(x) any(x != 0))]
otumat <- as.matrix(data[-1])
OTU = otu_table(otumat, taxa_are_rows = TRUE)

# Building metadata object with groups formation info (all individuals)
second_elements <- sapply(strsplit(colnames(otumat), "_"), function(x) x[2])
sampledata <- data.frame(sample_types = ifelse(grepl("^C", second_elements), "Post", "Pre"))
rownames(sampledata) = colnames(otumat)
#head(sampledata, 3)
                          
SAM = sample_data(sampledata)
physeq = phyloseq(OTU, SAM)
metadata = data.frame(sample_data(physeq))

# ANOSIM all individuals
anosim(phyloseq::distance(physeq_veg, method="bray"), metadata_veg$sample_types)                          
                          
# ADONIS all individuals
css_beta = distance(physeq, method="bray")
ado_result = adonis2(css_beta ~ sample_types, data = metadata, perm=1e3, na.rm = TRUE)
ado_result
                          
# Betadisper all individuals
bd = betadisper(css_beta, metadata$'sample_types')
anova(bd)

options(repr.plot.width=10, repr.plot.height=10)
plot(bd)                                                    

In [None]:
# Separating vegetarian and omnivores
# Extract the sample data
sample_data_df <- sample_data(physeq)

# Subset phyloseq_veg for samples with names starting with 'V'
physeq_veg <- subset_samples(physeq, grepl("^V", rownames(sample_data_df)))
metadata_veg = data.frame(sample_data(physeq_veg))

# ANOSIM vegetarian
anosim(phyloseq::distance(physeq_veg, method="bray"), metadata_veg$sample_types)                          

# ADONIS vegetarian
v_css_beta = distance(physeq_veg, method="bray")
v_ado_result = adonis2(v_css_beta ~ sample_types, data = metadata_veg, perm=1e3, na.rm = TRUE)
v_ado_result

# Betadisper vegetarian
v_bd = betadisper(v_css_beta, metadata_veg$'sample_types')
anova(v_bd)
plot(v_bd)                                                    

# Subset phyloseq_omn for samples with names starting with 'O'
physeq_omn <- subset_samples(physeq, grepl("^O", rownames(sample_data_df)))
metadata_omn = data.frame(sample_data(physeq_omn))

# ANOSIM omnivores
anosim(phyloseq::distance(physeq_omn, method="bray"), metadata_omn$sample_types)

# ADONIS omnivores
o_css_beta = distance(physeq_omn, method="bray")
o_ado_result = adonis2(o_css_beta ~ sample_types, data = metadata_omn, perm=1e3, na.rm = TRUE)
o_ado_result

# Betadisper omnivores
o_bd = betadisper(o_css_beta, metadata_omn$'sample_types')
anova(o_bd)
plot(o_bd)                                                    


### Functional annotation KOs

In [None]:
# Building a clean phyloseq object for KOs annotations
data <- read.table("tmao_kos_matrix.tsv", header = TRUE, sep = "\t", row.names = 1)
data <- data[!apply(data, 1, function(x) all(x == 0 | is.na(x))), ]
data <- data[, apply(data, 2, function(x) any(x != 0))]
otumat <- as.matrix(data[-1])
OTU = otu_table(otumat, taxa_are_rows = TRUE)

# Building metadata object with groups formation info (all individuals)
second_elements <- sapply(strsplit(colnames(otumat), "_"), function(x) x[2])
sampledata <- data.frame(sample_types = ifelse(grepl("^C", second_elements), "Post", "Pre"))
rownames(sampledata) = colnames(otumat)
#head(sampledata, 3)
                          
SAM = sample_data(sampledata)
physeq = phyloseq(OTU, SAM)
metadata = data.frame(sample_data(physeq))

# Normalisation by cumulative sum scaling (CSS)
physeq_CSS = normalize(physeq, method = "CSS")                          
                        
# ANOSIM all individuals
anosim(phyloseq::distance(physeq_CSS, method="bray"), metadata$sample_types)                          
                                                  
# ADONIS all individuals
css_beta = distance(physeq_CSS, method="bray")
ado_result = adonis2(css_beta ~ sample_types, data = metadata, perm=1e3, na.rm = TRUE)
ado_result
                          
# Betadisper omnivores
bd = betadisper(css_beta, metadata$'sample_types')
anova(bd)
plot(bd)                                                    
                          

In [None]:
# Separating vegetarian and omnivores
# Extract the sample data
sample_data_df <- sample_data(physeq)

# Subset phyloseq_veg for samples with names starting with 'V'
physeq_veg <- subset_samples(physeq, grepl("^V", rownames(sample_data_df)))
metadata_veg = data.frame(sample_data(physeq_veg))

# Normalisation by cumulative sum scaling (CSS)
physeq_CSS_veg = normalize(physeq_veg, method = "CSS")                          

# ANOSIM vegetarian
anosim(phyloseq::distance(physeq_CSS_veg, method="bray"), metadata_veg$sample_types)                          

# ADONIS vegetarian
v_css_beta = distance(physeq_CSS_veg, method="bray")
v_ado_result = adonis2(v_css_beta ~ sample_types, data = metadata_veg, perm=1e3, na.rm = TRUE)
v_ado_result

# Betadisper vegetarian
v_bd = betadisper(v_css_beta, metadata_veg$'sample_types')
anova(v_bd)
plot(v_bd)                                                    

# Subset phyloseq_omn for samples with names starting with 'O'
physeq_omn <- subset_samples(physeq, grepl("^O", rownames(sample_data_df)))
metadata_omn = data.frame(sample_data(physeq_omn))

# Normalisation by cumulative sum scaling (CSS)
physeq_CSS_omn = normalize(physeq_omn, method = "CSS")                          

# ANOSIM omnivores
anosim(phyloseq::distance(physeq_CSS_omn, method="bray"), metadata_omn$sample_types)                  

# ADONIS omnivores
o_css_beta = distance(physeq_CSS_omn, method="bray")
o_ado_result = adonis2(o_css_beta ~ sample_types, data = metadata_omn, perm=1e3, na.rm = TRUE)
o_ado_result

# Betadisper omnivores
o_bd = betadisper(o_css_beta, metadata_omn$'sample_types')
anova(o_bd)
plot(o_bd)                                                    


### Functional annotation Pfams

In [None]:
# Building a clean phyloseq object for KOs annotations
data <- read.table("tmao_pfam_matrix.tsv", header = TRUE, sep = "\t", row.names = 1)
data <- data[!apply(data, 1, function(x) all(x == 0 | is.na(x))), ]
data <- data[, apply(data, 2, function(x) any(x != 0))]
otumat <- as.matrix(data[-1])
OTU = otu_table(otumat, taxa_are_rows = TRUE)

# Building metadata object with groups formation info (all individuals)
second_elements <- sapply(strsplit(colnames(otumat), "_"), function(x) x[2])
sampledata <- data.frame(sample_types = ifelse(grepl("^C", second_elements), "Post", "Pre"))
rownames(sampledata) = colnames(otumat)
#head(sampledata, 3)
                          
SAM = sample_data(sampledata)
physeq = phyloseq(OTU, SAM)
metadata = data.frame(sample_data(physeq))

# Normalisation by cumulative sum scaling (CSS)
physeq_CSS = normalize(physeq, method = "CSS")                          
                                                    
# ANOSIM all individuals
#anosim(phyloseq::distance(physeq_CSS, method="bray"), metadata$sample_types)                                                   
                          
# ADONIS all individuals
css_beta = distance(physeq_CSS, method="bray")
ado_result = adonis2(css_beta ~ sample_types, data = metadata, perm=1e3, na.rm = TRUE)
#ado_result
                          
# Betadisper all individuals
bd = betadisper(css_beta, metadata$'sample_types')
anova(bd)
plot(bd)                                                    


In [None]:
# Separating vegetarian and omnivores
# Extract the sample data
sample_data_df <- sample_data(physeq)

# Subset phyloseq_veg for samples with names starting with 'V'
physeq_veg <- subset_samples(physeq, grepl("^V", rownames(sample_data_df)))
metadata_veg = data.frame(sample_data(physeq_veg))

# Normalisation by cumulative sum scaling (CSS)
physeq_CSS_veg = normalize(physeq_veg, method = "CSS")                          

# ANOSIM vegetarian
anosim(phyloseq::distance(physeq_CSS_veg, method="bray"), metadata_veg$sample_types)                                               

# ADONIS vegetarian
v_css_beta = distance(physeq_CSS_veg, method="bray")
v_ado_result = adonis2(v_css_beta ~ sample_types, data = metadata_veg, perm=1e3, na.rm = TRUE)
v_ado_result

# Betadisper vegetarian
v_bd = betadisper(v_css_beta, metadata_veg$'sample_types')
anova(v_bd)
plot(v_bd)                                                    

# Subset phyloseq_omn for samples with names starting with 'O'
physeq_omn <- subset_samples(physeq, grepl("^O", rownames(sample_data_df)))
metadata_omn = data.frame(sample_data(physeq_omn))

# Normalisation by cumulative sum scaling (CSS)
physeq_CSS_omn = normalize(physeq_omn, method = "CSS")                          

# ANOSIM omnivores
anosim(phyloseq::distance(physeq_CSS_omn, method="bray"), metadata_omn$sample_types)                                               

# ADONIS omnivores
o_css_beta = distance(physeq_CSS_omn, method="bray")
o_ado_result = adonis2(o_css_beta ~ sample_types, data = metadata_omn, perm=1e3, na.rm = TRUE)
o_ado_result

# Betadisper omnivores
o_bd = betadisper(o_css_beta, metadata_omn$'sample_types')
anova(o_bd)
plot(o_bd)                                                    


## Differentially abundant species between high and low TMAO producer groups using maaslin3
### All individuals (species)

In [None]:
# Reading the species table
data <- read.table("tmao_taxo_matrix.tsv", header = TRUE, sep = "\t", row.names = 1)
data <- data[!apply(data, 1, function(x) all(x == 0 | is.na(x))), ]
data <- data[, apply(data, 2, function(x) any(x != 0))]
  
# Building metadata object with groups formation info (all individuals)
second_elements <- sapply(strsplit(colnames(data), "_"), function(x) x[2])
sampledata <- data.frame(sample_types = ifelse(grepl("HP", second_elements), "High", "Low"))
rownames(sampledata) = colnames(data)
sampledata$reads <- 1000000

In [None]:
set.seed(1)
fit_out <- maaslin3(input_data = data,
                    input_metadata = sampledata,
                    output = 'maaslin3_output',
                    formula = '~ sample_types + reads',
                    max_pngs = 20)

### All individuals (KOs)

In [None]:
# Reading the KOs table
data <- read.table("tmao_kos_matrix.tsv", header = TRUE, sep = "\t", row.names = 1)
data <- data[!apply(data, 1, function(x) all(x == 0 | is.na(x))), ]
data <- data[, apply(data, 2, function(x) any(x != 0))]
  
# Building metadata object with groups formation info (all individuals)
second_elements <- sapply(strsplit(colnames(data), "_"), function(x) x[2])
sampledata <- data.frame(sample_types = ifelse(grepl("HP", second_elements), "High", "Low"))
rownames(sampledata) = colnames(data)
sampledata$reads <- 1000000

In [None]:
set.seed(1)
fit_out <- maaslin3(input_data = data,
                    input_metadata = sampledata,
                    output = 'maaslin3_output',
                    formula = '~ sample_types + reads',
                    max_pngs = 20)