# 1. Import Packages

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import qiime2 as q2
from qiime2 import Visualization
from scipy.stats import shapiro, kruskal, f_oneway

# 2. Data Directionary

In [2]:
# Location
data_dir = "Project_data/FungalTrait"
! mkdir -p "$data_dir"

In [3]:
# Paths to project inputs
input_table    = "Project_data/Taxonomy/table_filtered.qza"
input_taxonomy = "Project_data/Taxonomy/taxonomy_pretrained.qza"
input_metadata = "Project_data/Metadata/updated_fungut_metadata.tsv"

# 3. Export QIIME2 Artifacts to TSV

In [22]:
# Export taxonomy file
! qiime tools export \
    --input-path "$input_taxonomy" \
    --output-path "$data_dir"

# Export feature table (BIOM)
! qiime tools export \
    --input-path "$input_table" \
    --output-path "$data_dir"

  import pkg_resources
[32mExported Project_data/Taxonomy/taxonomy_pretrained.qza as TSVTaxonomyDirectoryFormat to directory Project_data/FungalTrial[0m
  import pkg_resources
[32mExported Project_data/Taxonomy/table_filtered.qza as BIOMV210DirFmt to directory Project_data/FungalTrial[0m
[0m[?25h

In [23]:
# Convert BIOM -> TSV
biom_path = f"{data_dir}/feature-table.biom"
tsv_path = f"{data_dir}/feature-table.tsv"

! biom convert \
    -i "$biom_path" \
    -o "$tsv_path" \
    --to-tsv

# 4. Build the input table

In [34]:
feature_tsv = f"{data_dir}/feature-table.tsv"

# Load feature table
feature_df = pd.read_csv(
    feature_tsv,
    sep="\t",
    skiprows=[0],      
    index_col=0     
)

feature_df.index.name = "feature_id"
counts_df = feature_df.reset_index() 

print("Feature table shape:", feature_df.shape)
feature_df.head()

Feature table shape: (895, 150)


Unnamed: 0_level_0,ERR5327198,ERR5327199,ERR5327266,ERR5327282,ERR5327284,ERR5327285,ERR5327287,ERR5327288,ERR5327289,ERR5327300,...,ERR5327586,ERR5327587,ERR5327591,ERR5327592,ERR5327596,ERR5327599,ERR5327604,ERR5327605,ERR5327615,ERR5327620
feature_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
85984bb07b9a865c7fcd38cc22da75ee,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3fced2f51416171e2b78fea746e25c75,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
36fb54d19585adaedc1e6c3185e10647,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
524888a74906e1255dc6108606921757,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
e94c66ea3c4c033ce916645a7431b243,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [35]:
tax_tsv     = f"{data_dir}/taxonomy.tsv"

# Load taxonomy table
tax_df = pd.read_csv(
    tax_tsv,
    sep="\t",
    comment="#"
)
tax_df = tax_df.rename(columns={
    "Feature ID": "feature_id",
    "Taxon": "taxonomy"
})

# Standardise column names
tax_df = tax_df.rename(columns={
    "Feature ID": "feature_id",
    "Taxon": "taxonomy"
})

print("Taxonomy table shape:", tax_df.shape)
tax_df.head()

Taxonomy table shape: (734, 3)


Unnamed: 0,feature_id,taxonomy,Confidence
0,f872ab159e2219de905e49b556b85d05,k__Fungi;p__Ascomycota;c__Saccharomycetes;o__S...,0.999972
1,d3b20e3fa30a65662fc3a6e0057d6e90,k__Fungi;p__Ascomycota;c__Ascomycota_cls_Incer...,0.999984
2,333df8f222ab30bfdffd5b7d3c5a789b,k__Fungi;p__Ascomycota;c__Saccharomycetes;o__S...,0.918087
3,beba691fc7ee5c7219589a54ec45b0bd,k__Fungi;p__Ascomycota;c__Eurotiomycetes;o__Eu...,0.841928
4,5e7d76c82d92bc95d366c4c5796d3b21,Unassigned,0.380525


In [36]:
feature_ids = set(feature_df.index)
tax_ids     = set(tax_df["feature_id"])

shared_ids = feature_ids & tax_ids
print("Number of IDs in feature table:", len(feature_ids))
print("Number of IDs in taxonomy   :", len(tax_ids))
print("Number of shared IDs        :", len(shared_ids))

Number of IDs in feature table: 895
Number of IDs in taxonomy   : 734
Number of shared IDs        : 0


In [37]:
# Merge counts + taxonomy into one table
fungaltrait_input = counts_df.merge(
    tax_df[["feature_id", "taxonomy"]],
    on="feature_id",
    how="left"
)

# Drop features without taxonomy
fungaltrait_input = fungaltrait_input.dropna(subset=["taxonomy"])

# rename feature_id 
fungaltrait_input = fungaltrait_input.rename(columns={"feature_id": "ASV_ID"})

print("FungalTrait input shape:", fungaltrait_input.shape)
fungaltrait_input.head()

FungalTrial input shape: (0, 152)


Unnamed: 0,ASV_ID,ERR5327198,ERR5327199,ERR5327266,ERR5327282,ERR5327284,ERR5327285,ERR5327287,ERR5327288,ERR5327289,...,ERR5327587,ERR5327591,ERR5327592,ERR5327596,ERR5327599,ERR5327604,ERR5327605,ERR5327615,ERR5327620,taxonomy


In [None]:
# Save table
fungaltrait_input_path = f"{data_dir}/fungaltrait_input.txt"
fungaltrait_input.to_csv(fungaltrait_input_path, sep="\t", index=False)

print("Saved FungalTrait input to:", fungaltrait_inputt_path)

# 5. Extract genus & species from taxonomy

In [None]:
def extract_genus_species(tax_str):
    if pd.isna(tax_str):
        return pd.Series({"genus": np.nan, "species": np.nan})

    parts = [p.strip() for p in tax_str.split(";")]

    # find g__ and s__ entries
    genus = next((p[3:] for p in parts if p.startswith("g__")), np.nan)
    species = next((p[3:] for p in parts if p.startswith("s__")), np.nan)

    # FungalTraits uses 'Genus species' with a space, not underscores
    if isinstance(species, str) and species not in ("", "unassigned", "Unassigned", "s__"):
        species_clean = species.replace("_", " ")
    else:
        species_clean = np.nan

    return pd.Series({"genus": genus, "species": species_clean})

# Apply to the taxonomy column of your merged table
tax_parsed = fungaltrait_input["taxonomy"].apply(extract_genus_species)

# Attach genus & species columns
fungaltrait_input = pd.concat([fungaltrait_input, tax_parsed], axis=1)

print(fungaltrait_input[["ASV_ID", "taxonomy", "genus", "species"]].head())

In [None]:
# only keep ASVs with species-level IDs
fungaltrait_input_species = fungaltrait_input_input.dropna(subset=["species"])

print("Rows before species filter:", fungaltrait_input.shape[0])
print("Rows after species filter :", fungaltrait_inputt_species.shape[0])

# 6. Create mapping table for FungalTraits

In [None]:
# ASV ↔ species mapping
asv_species_map = fungaltrait_input_species[["ASV_ID", "genus", "species"]].drop_duplicates()

asv_species_path = f"{data_dir}/fungaltrait_asv_species.tsv"
asv_species_map.to_csv(asv_species_path, sep="\t", index=False)

print("Saved ASV–species map to:", asv_species_path)
asv_species_map.head()

In [None]:
# Count table (ASVs × samples) with IDs
sample_cols = [c for c in fungaltrait_input_species.columns
               if c not in ["taxonomy", "genus", "species"]]

counts_only = fungaltrait_input_species[sample_cols]

counts_path = f"{data_dir}/fungaltrait_counts.tsv"
counts_only.to_csv(counts_path, sep="\t", index=False)

print("Saved counts table to:", counts_path)
counts_only.head()

# 7. FungalTraits

```
library(readr)
library(dplyr)

1) Load your exported tables
asv_species <- read_tsv("fungaltrait_asv_species.tsv")   # ASV_ID, genus, species
asv_counts  <- read_tsv("fungaltrait_counts.tsv")        # ASV_ID + samples

2) Load FungalTraits database (file name may differ)
fungaltraits <- read_tsv("FungalTraits_1.0.txt")

names(fungaltraits)[names(fungaltraits) == "Species"] <- "species"

3) Attach traits to each ASV
asv_with_traits <- asv_species %>%
  left_join(fungaltraits, by = "species")

4) Combine traits with counts
asv_trait_counts <- asv_with_traits %>%
  left_join(asv_counts, by = "ASV_ID")


relative abundance of trophic modes per sample
sample_cols <- colnames(asv_counts)[colnames(asv_counts) != "ASV_ID"]

trophic_by_sample <- asv_trait_counts %>%
  filter(!is.na(Trophic_mode)) %>%     # or the exact column name in FungalTraits
  group_by(Trophic_mode) %>%
  summarise(across(all_of(sample_cols), sum)) %>%
  ungroup()

Convert to relative abundances per sample 
trophic_rel <- trophic_by_sample
trophic_rel[sample_cols] <- apply(trophic_by_sample[sample_cols], 2, function(x) x / sum(x))
```

In [4]:
%load_ext rpy2.ipython

In [9]:
%%R
install.packages("readr")
install.packages("dplyr")
install.packages("reshape2")
install.packages("ggplot2")
install.packages("tidyr")
install.packages("tidyr")
install.packages("svglite")

R[write to console]: trying URL 'https://stat.ethz.ch/CRAN/src/contrib/readr_2.1.6.tar.gz'

R[write to console]: Content type 'application/x-gzip'
R[write to console]:  length 299193 bytes (292 KB)

R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]: =
R[write to console]

x86_64-conda-linux-gnu-c++ -std=gnu++17 -I"/opt/conda/lib/R/include" -DNDEBUG  -I'/opt/conda/lib/R/library/cpp11/include' -I'/opt/conda/lib/R/library/tzdb/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /opt/conda/include -I/opt/conda/include -Wl,-rpath-link,/opt/conda/lib    -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /opt/conda/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1714471492496/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/opt/conda=/usr/local/src/conda-prefix  -c Collector.cpp -o Collector.o
x86_64-conda-linux-gnu-c++ -std=gnu++17 -I"/opt/conda/lib/R/include" -DNDEBUG  -I'/opt/conda/lib/R/library/cpp11/include' -I'/opt/conda/lib/R/library/tzdb/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /opt/conda/include -I/opt/conda/include -Wl,-rpath-link,/opt/conda/lib    -fpic  -fvisibility-in

installing to /opt/conda/lib/R/library/00LOCK-readr/00new/readr/libs
** R
** inst
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
*** copying figures
** building package indices
** installing vignettes
** testing if installed package can be loaded from temporary location
** checking absolute paths in shared objects and dynamic libraries
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (readr)
R[write to console]: 

R[write to console]: 
R[write to console]: The downloaded source packages are in
	‘/tmp/RtmpwWqBWz/downloaded_packages’
R[write to console]: 
R[write to console]: 

R[write to console]: Updating HTML index of packages in '.Library'

R[write to console]: Making 'packages.html' ...
R[write to console]:  done

R[write to console]: trying URL 'https://stat.ethz.ch/CRAN/src/contrib/dplyr_1.1.4.tar.gz'

R[write to console]: Content type 'ap

x86_64-conda-linux-gnu-c++ -std=gnu++17 -I"/opt/conda/lib/R/include" -DNDEBUG   -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /opt/conda/include -I/opt/conda/include -Wl,-rpath-link,/opt/conda/lib    -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /opt/conda/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1714471492496/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/opt/conda=/usr/local/src/conda-prefix  -c chop.cpp -o chop.o
x86_64-conda-linux-gnu-c++ -std=gnu++17 -I"/opt/conda/lib/R/include" -DNDEBUG   -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /opt/conda/include -I/opt/conda/include -Wl,-rpath-link,/opt/conda/lib    -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /opt/conda/include -

installing to /opt/conda/lib/R/library/00LOCK-dplyr/00new/dplyr/libs
** R
** data
*** moving datasets to lazyload DB
** inst
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
*** copying figures
** building package indices
** installing vignettes
** testing if installed package can be loaded from temporary location
** checking absolute paths in shared objects and dynamic libraries
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (dplyr)
R[write to console]: 

R[write to console]: 
R[write to console]: The downloaded source packages are in
	‘/tmp/RtmpwWqBWz/downloaded_packages’
R[write to console]: 
R[write to console]: 

R[write to console]: Updating HTML index of packages in '.Library'

R[write to console]: Making 'packages.html' ...
R[write to console]:  done

R[write to console]: trying URL 'https://stat.ethz.ch/CRAN/src/contrib/reshape2_1.4.5.

x86_64-conda-linux-gnu-c++ -std=gnu++17 -I"/opt/conda/lib/R/include" -DNDEBUG  -I'/opt/conda/lib/R/library/Rcpp/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /opt/conda/include -I/opt/conda/include -Wl,-rpath-link,/opt/conda/lib    -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /opt/conda/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1714471492496/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/opt/conda=/usr/local/src/conda-prefix  -c RcppExports.cpp -o RcppExports.o
x86_64-conda-linux-gnu-c++ -std=gnu++17 -I"/opt/conda/lib/R/include" -DNDEBUG  -I'/opt/conda/lib/R/library/Rcpp/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /opt/conda/include -I/opt/conda/include -Wl,-rpath-link,/opt/conda/lib    -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fP

installing to /opt/conda/lib/R/library/00LOCK-reshape2/00new/reshape2/libs
** R
** data
*** moving datasets to lazyload DB
** inst
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
** building package indices
** testing if installed package can be loaded from temporary location
** checking absolute paths in shared objects and dynamic libraries
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (reshape2)
R[write to console]: 

R[write to console]: 
R[write to console]: The downloaded source packages are in
	‘/tmp/RtmpwWqBWz/downloaded_packages’
R[write to console]: 
R[write to console]: 

R[write to console]: Updating HTML index of packages in '.Library'

R[write to console]: Making 'packages.html' ...
R[write to console]:  done

R[write to console]: trying URL 'https://stat.ethz.ch/CRAN/src/contrib/ggplot2_4.0.1.tar.gz'

R[write to console]: Conten

x86_64-conda-linux-gnu-c++ -std=gnu++17 -I"/opt/conda/lib/R/include" -DNDEBUG  -I'/opt/conda/lib/R/library/cpp11/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /opt/conda/include -I/opt/conda/include -Wl,-rpath-link,/opt/conda/lib    -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /opt/conda/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1714471492496/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/opt/conda=/usr/local/src/conda-prefix  -c cpp11.cpp -o cpp11.o
x86_64-conda-linux-gnu-c++ -std=gnu++17 -I"/opt/conda/lib/R/include" -DNDEBUG  -I'/opt/conda/lib/R/library/cpp11/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /opt/conda/include -I/opt/conda/include -Wl,-rpath-link,/opt/conda/lib    -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack

installing to /opt/conda/lib/R/library/00LOCK-tidyr/00new/tidyr/libs
** R
** data
*** moving datasets to lazyload DB
** inst
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
*** copying figures
** building package indices
** installing vignettes
** testing if installed package can be loaded from temporary location
** checking absolute paths in shared objects and dynamic libraries
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (tidyr)
R[write to console]: 

R[write to console]: 
R[write to console]: The downloaded source packages are in
	‘/tmp/RtmpwWqBWz/downloaded_packages’
R[write to console]: 
R[write to console]: 

R[write to console]: Updating HTML index of packages in '.Library'

R[write to console]: Making 'packages.html' ...
R[write to console]:  done

R[write to console]: trying URL 'https://stat.ethz.ch/CRAN/src/contrib/tidyr_1.3.1.tar

x86_64-conda-linux-gnu-c++ -std=gnu++17 -I"/opt/conda/lib/R/include" -DNDEBUG  -I'/opt/conda/lib/R/library/cpp11/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /opt/conda/include -I/opt/conda/include -Wl,-rpath-link,/opt/conda/lib    -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /opt/conda/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1714471492496/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/opt/conda=/usr/local/src/conda-prefix  -c cpp11.cpp -o cpp11.o
x86_64-conda-linux-gnu-c++ -std=gnu++17 -I"/opt/conda/lib/R/include" -DNDEBUG  -I'/opt/conda/lib/R/library/cpp11/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /opt/conda/include -I/opt/conda/include -Wl,-rpath-link,/opt/conda/lib    -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack

installing to /opt/conda/lib/R/library/00LOCK-tidyr/00new/tidyr/libs
** R
** data
*** moving datasets to lazyload DB
** inst
** byte-compile and prepare package for lazy loading
** help
*** installing help indices
*** copying figures
** building package indices
** installing vignettes
** testing if installed package can be loaded from temporary location
** checking absolute paths in shared objects and dynamic libraries
** testing if installed package can be loaded from final location
** testing if installed package keeps a record of temporary installation path
* DONE (tidyr)
R[write to console]: 

R[write to console]: 
R[write to console]: The downloaded source packages are in
	‘/tmp/RtmpwWqBWz/downloaded_packages’
R[write to console]: 
R[write to console]: 

R[write to console]: Updating HTML index of packages in '.Library'

R[write to console]: Making 'packages.html' ...
R[write to console]:  done

R[write to console]: also installing the dependency ‘systemfonts’


R[write to console

Using PKG_CFLAGS=
Using PKG_LIBS=-lfontconfig -lfreetype
rm -f systemfonts.so caches.o cpp11.o dev_metrics.o font_matching.o font_local.o font_variation.o font_registry.o ft_cache.o string_shape.o font_metrics.o font_outlines.o font_fallback.o string_metrics.o emoji.o cache_store.o init.o unix/FontManagerLinux.o
x86_64-conda-linux-gnu-c++ -std=gnu++17 -I"/opt/conda/lib/R/include" -DNDEBUG  -I'/opt/conda/lib/R/library/cpp11/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /opt/conda/include -I/opt/conda/include -Wl,-rpath-link,/opt/conda/lib    -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /opt/conda/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1714471492496/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/opt/conda=/usr/local/src/conda-prefix  -c caches.cpp -o caches.o


** libs
using C++ compiler: ‘x86_64-conda-linux-gnu-c++ (conda-forge gcc 15.1.0-5) 15.1.0’
In file included from caches.h:6,
                 from caches.cpp:1:
FontDescriptor.h:8:10: fatal error: ft2build.h: No such file or directory
    8 | #include <ft2build.h>
      |          ^~~~~~~~~~~~
compilation terminated.
make: *** [/opt/conda/lib/R/etc/Makeconf:200: caches.o] Error 1
ERROR: compilation failed for package ‘systemfonts’
* removing ‘/opt/conda/lib/R/library/systemfonts’
* restoring previous ‘/opt/conda/lib/R/library/systemfonts’
* installing *source* package ‘svglite’ ...
** package ‘svglite’ successfully unpacked and MD5 sums checked
** using staged installation
** libs
using C++ compiler: ‘x86_64-conda-linux-gnu-c++ (conda-forge gcc 15.1.0-5) 15.1.0’


x86_64-conda-linux-gnu-c++ -std=gnu++17 -I"/opt/conda/lib/R/include" -DNDEBUG  -I'/opt/conda/lib/R/library/cpp11/include' -I'/opt/conda/lib/R/library/systemfonts/include' -I'/opt/conda/lib/R/library/textshaping/include' -DNDEBUG -D_FORTIFY_SOURCE=2 -O2 -isystem /opt/conda/include -I/opt/conda/include -Wl,-rpath-link,/opt/conda/lib    -fpic  -fvisibility-inlines-hidden  -fmessage-length=0 -march=nocona -mtune=haswell -ftree-vectorize -fPIC -fstack-protector-strong -fno-plt -O2 -ffunction-sections -pipe -isystem /opt/conda/include -fdebug-prefix-map=/home/conda/feedstock_root/build_artifacts/r-base-split_1714471492496/work=/usr/local/src/conda/r-base-4.3.3 -fdebug-prefix-map=/opt/conda=/usr/local/src/conda-prefix  -c SvgStream.cpp -o SvgStream.o
x86_64-conda-linux-gnu-c++ -std=gnu++17 -I"/opt/conda/lib/R/include" -DNDEBUG  -I'/opt/conda/lib/R/library/cpp11/include' -I'/opt/conda/lib/R/library/systemfonts/include' -I'/opt/conda/lib/R/library/textshaping/include' -DNDEBUG -D_FORTIFY_SOURCE

devSVG.cpp: In function 'void svg_text(double, double, const char*, double, double, pGEcontext, pDevDesc)':
devSVG.cpp:992:22: error: 'get_glyph_path' was not declared in this scope
  992 |         (*stream) << get_glyph_path(
      |                      ^~~~~~~~~~~~~~
devSVG.cpp: In function 'void svg_glyph(int, int*, double*, double*, SEXP, double, int, double, pDevDesc)':
devSVG.cpp:1781:21: error: 'get_glyph_path' was not declared in this scope
 1781 |     std::string p = get_glyph_path(
      |                     ^~~~~~~~~~~~~~
In file included from /opt/conda/lib/R/library/cpp11/include/cpp11/R.hpp:20,
                 from /opt/conda/lib/R/library/cpp11/include/cpp11/list.hpp:5,
                 from devSVG.cpp:24:
devSVG.cpp:1793:29: error: 'get_glyph_raster' was not declared in this scope
 1793 |       SEXP raster = PROTECT(get_glyph_raster(
      |                             ^~~~~~~~~~~~~~~~
/opt/conda/lib/R/include/Rinternals.h:368:36: note: in definition of macro 'PROTEC

In [12]:
%%R
### FungalTraits

# 0. Packages
library(readr)
library(dplyr)
library(reshape2)
library(ggplot2)
library(tidyr)
#library(svglite)


# 1. Load data

# ASV ↔ species mapping (ASV_ID, genus, species)
asv_species <- read_tsv("Project_data/FungalTrait/fungaltrait_asv_species.tsv")

# Count table (ASV_ID + sample columns)
asv_counts  <- read_tsv("Project_data/FungalTrait/fungaltrait_counts.tsv")

# Metadata
metadata    <- read_tsv("Project_data/Metadata/updated_fungut_metadata.tsv")

head(asv_species)
head(asv_counts)[, 1:5]




# 2. Load FungalTrait database

# FungalTraits table downloaded from Google Sheets as CSV
fungaltraits <- read_csv("Project_data/FungalTrait/FungalTraits_v1.2.csv")

# Clean column names a bit: no spaces, consistent case
names(fungaltraits) <- gsub("\\s+", "_", names(fungaltraits))

# Make sure there is a column called "species"
if ("Species" %in% names(fungaltraits) && !"species" %in% names(fungaltraits)) {
  fungaltraits <- fungaltraits |> rename(species = Species)
}

# Same for genus if necessary (GENUS → genus)
if ("GENUS" %in% names(fungaltraits) && !"genus" %in% names(fungaltraits)) {
  fungaltraits <- fungaltraits |> rename(genus = GENUS)
}

# Have a look:
head(fungaltraits)



# 3. Attach traits to each ASV (species-level join)

# Join by species name: every ASV gets the traits of its species
asv_with_traits <- asv_species %>%
  left_join(fungaltraits, by = "genus")

# Check: do we see trait columns attached?
head(asv_with_traits[, c("ASV_ID", "genus", "species", "primary_lifestyle")])



# 4. Merges traits with the ASV abundance counts
asv_trait_counts <- asv_with_traits %>%
  left_join(asv_counts, by = "ASV_ID")

# Check
head(asv_trait_counts[, c("ASV_ID", "genus", "primary_lifestyle", "ERR5327198")])



# 5. Compute Lifestyle Abundance per Sample

# keep only columns that are numeric abundance values
sample_cols <- names(asv_trait_counts)[sapply(asv_trait_counts, is.numeric)]

head(sample_cols)
length(sample_cols)

# Summarize abundance per lifestyle
trophic_by_sample <- asv_trait_counts %>%
  filter(!is.na(primary_lifestyle)) %>%
  group_by(primary_lifestyle) %>%
  summarise(across(all_of(sample_cols), sum)) %>%
  ungroup()

# Absolute counts per lifestyle per sample
head(trophic_by_sample[, 1:6])

# Convert to relative abundance
trophic_rel <- trophic_by_sample
trophic_rel[sample_cols] <- apply(trophic_rel[sample_cols], 2, function(x) x / sum(x))

# Relative abundance per lifestyle per sample
head(trophic_rel[, 1:6])

# transpose: samples as rows, lifestyles as columns
trophic_rel_t <- t(trophic_rel[sample_cols])
colnames(trophic_rel_t) <- trophic_rel$primary_lifestyle
trophic_rel_t <- as.data.frame(trophic_rel_t)

# add sample IDs as a column
trophic_rel_t$SampleID <- rownames(trophic_rel_t)

# Check
head(trophic_rel_t)



# 6. Attach Sample Metadata
colnames(metadata)
metadata <- metadata %>%
  rename(SampleID = ID)

trophic_final <- trophic_rel_t %>%
  left_join(metadata, by = "SampleID")

# Check
head(trophic_final)



# 7. Visualizations

# Select the lifestyle columns
meta_cols <- colnames(metadata)
lifestyle_cols <- setdiff(colnames(trophic_final),
                          c("SampleID", meta_cols))

lifestyle_cols

trophic_long <- trophic_final %>%
  select(SampleID, all_of(lifestyle_cols)) %>%
  pivot_longer(
    cols      = all_of(lifestyle_cols),
    names_to  = "Lifestyle",
    values_to = "RelAbundance"
  ) %>%
  left_join(metadata, by = "SampleID")


# IBD group
trophic_group <- trophic_long %>% 
  group_by(ibd_sample, Lifestyle) %>%           
  summarise(mean_rel = mean(RelAbundance),
            .groups = "drop")

plot_ibd <- ggplot(trophic_group,
       aes(x = ibd_sample, y = mean_rel, fill = Lifestyle)) +
  geom_col(position = "fill") +              
  ylab("Mean relative abundance") +
  xlab("Environment (IBD)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Gluten Status
trophic_group_2 <- trophic_long %>% 
  group_by(gluten_sample, Lifestyle) %>%           
  summarise(mean_rel = mean(RelAbundance),
            .groups = "drop")

plot_gluten <- ggplot(trophic_group_2,
       aes(x = gluten_sample, y = mean_rel, fill = Lifestyle)) +
  geom_col(position = "fill") +              
  ylab("Mean relative abundance") +
  xlab("Environment (Gluten Status)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))


# Diet
trophic_group_3 <- trophic_long %>% 
  group_by(diet_type_sample, Lifestyle) %>%           
  summarise(mean_rel = mean(RelAbundance),
            .groups = "drop")

plot_diet <- ggplot(trophic_group_3,
       aes(x = diet_type_sample, y = mean_rel, fill = Lifestyle)) +
  geom_col(position = "fill") +              
  ylab("Mean relative abundance") +
  xlab("Environment (Diet Type)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Gender
trophic_group_4 <- trophic_long %>% 
  group_by(sex_sample, Lifestyle) %>%           
  summarise(mean_rel = mean(RelAbundance),
            .groups = "drop")

plot_sex <- ggplot(trophic_group_4,
       aes(x = sex_sample, y = mean_rel, fill = Lifestyle)) +
  geom_col(position = "fill") +              
  ylab("Mean relative abundance") +
  xlab("Environment (Gender)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# BMI
trophic_group_5 <- trophic_long %>% 
  group_by(bmi_category, Lifestyle) %>%           
  summarise(mean_rel = mean(RelAbundance),
            .groups = "drop")

plot_bmi <- ggplot(trophic_group_5,
       aes(x = bmi_category, y = mean_rel, fill = Lifestyle)) +
  geom_col(position = "fill") +              
  ylab("Mean relative abundance") +
  xlab("Environment (BMI)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Continent
trophic_group_6 <- trophic_long %>% 
  group_by(continent, Lifestyle) %>%           
  summarise(mean_rel = mean(RelAbundance),
            .groups = "drop")

plot_continent <- ggplot(trophic_group_6,
       aes(x = continent, y = mean_rel, fill = Lifestyle)) +
  geom_col(position = "fill") +              
  ylab("Mean relative abundance") +
  xlab("Environment (Continent)") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))



# 8. Statistical testing

# metadata variables you analysed in Step 7
meta_vars <- c("ibd_sample", "gluten_sample", "diet_type_sample",
               "sex_sample", "bmi_category", "continent")

# all lifestyle columns
lifestyle_cols <- setdiff(colnames(trophic_final),
                          c("SampleID", colnames(metadata)))

# Kruskal–Wallis
run_kw <- function(meta_var) {
  results <- lapply(lifestyle_cols, function(lf) {
    
    # remove samples with missing metadata
    df <- trophic_final %>%
      filter(!is.na(.data[[meta_var]]))
    
    test <- kruskal.test(df[[lf]] ~ df[[meta_var]])
    
    data.frame(
      Lifestyle = lf,
      Metadata  = meta_var,
      p_value   = test$p.value
    )
  })
  
  do.call(rbind, results)
}

kw_results <- lapply(meta_vars, run_kw)
kw_results <- do.call(rbind, kw_results)

# adjust for multiple testing
kw_results$padj <- p.adjust(kw_results$p_value, method = "fdr")

print(kw_results)

# significant results
subset(kw_results, padj < 0.05)



# 9. Export Plots
dir.create("figures", showWarnings = FALSE)

#ggsave("figures/plot_ibd.svg",       plot_ibd,       width = 8, height = 5)
#ggsave("figures/plot_gluten.svg",    plot_gluten,    width = 8, height = 5)
#ggsave("figures/plot_diet.svg",      plot_diet,      width = 8, height = 5)
#ggsave("figures/plot_sex.svg",       plot_sex,       width = 8, height = 5)
#ggsave("figures/plot_bmi.svg",       plot_bmi,       width = 8, height = 5)
#ggsave("figures/plot_continent.svg", plot_continent, width = 8, height = 5)



Rows: 657 Columns: 3
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (3): ASV_ID, genus, species

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 657 Columns: 151
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr   (1): ASV_ID
dbl (150): ERR5327198, ERR5327199, ERR5327266, ERR5327282, ERR5327284, ERR53...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Rows: 150 Columns: 16
── Column specification ────────────────────────────────────────────────────────
Delimiter: "\t"
chr (10): ID, country_sample, state_sample, sex_sample, diet_type_sample, ib...
dbl  (6): latitude_sample, longitude_sample, age_years_sample, height_cm_sam...

ℹ Use `spec()` to retrieve the full column