# 1. Import Packages

In [95]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import qiime2 as q2
from qiime2 import Visualization
from scipy.stats import shapiro, kruskal, f_oneway

# 2. Data Directionary

In [96]:
# Location
data_dir = "Project_data/FungalTrait"
! mkdir -p "$data_dir"

In [97]:
# Paths to project inputs
input_table    = "Project_data/Taxonomy/table_filtered.qza"
input_taxonomy = "Project_data/Taxonomy/taxonomy_pretrained.qza"
input_metadata = "Project_data/Metadata/updated_fungut_metadata.tsv"

# 3. Export QIIME2 Artifacts to TSV

In [98]:
# Export taxonomy file
! qiime tools export \
    --input-path "$input_taxonomy" \
    --output-path "$data_dir"

# Export feature table (BIOM)
! qiime tools export \
    --input-path "$input_table" \
    --output-path "$data_dir"

  import pkg_resources
[32mExported Project_data/Taxonomy/taxonomy_pretrained.qza as TSVTaxonomyDirectoryFormat to directory Project_data/FungalTrait[0m
  import pkg_resources
[32mExported Project_data/Taxonomy/table_filtered.qza as BIOMV210DirFmt to directory Project_data/FungalTrait[0m
[0m[?25h

In [99]:
# Convert BIOM -> TSV
biom_path = f"{data_dir}/feature-table.biom"
tsv_path = f"{data_dir}/feature-table.tsv"

! biom convert \
    -i "$biom_path" \
    -o "$tsv_path" \
    --to-tsv

# 4. Build the input table

In [100]:
feature_tsv = f"{data_dir}/feature-table.tsv"

# Load feature table
feature_df = pd.read_csv(
    feature_tsv,
    sep="\t",
    skiprows=[0],      
    index_col=0     
)

feature_df.index.name = "feature_id"
counts_df = feature_df.reset_index() 

print("Feature table shape:", feature_df.shape)
feature_df.head()

Feature table shape: (895, 150)


Unnamed: 0_level_0,ERR5327198,ERR5327199,ERR5327266,ERR5327282,ERR5327284,ERR5327285,ERR5327287,ERR5327288,ERR5327289,ERR5327300,...,ERR5327586,ERR5327587,ERR5327591,ERR5327592,ERR5327596,ERR5327599,ERR5327604,ERR5327605,ERR5327615,ERR5327620
feature_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
b4b4cf0cd970d24a715d6d4ddb2173d5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
d7740a632d92c0dd6324c4131c5d8d34,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
edda090236c8af5510b539c4bc7cd634,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
a112f028910db87dfd35a3a916544d74,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
769fdf5d96caec552665c89f6952d08a,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [101]:
tax_tsv     = f"{data_dir}/taxonomy.tsv"

# Load taxonomy table
tax_df = pd.read_csv(
    tax_tsv,
    sep="\t",
    comment="#"
)
tax_df = tax_df.rename(columns={
    "Feature ID": "feature_id",
    "Taxon": "taxonomy"
})

# Standardise column names
tax_df = tax_df.rename(columns={
    "Feature ID": "feature_id",
    "Taxon": "taxonomy"
})

print("Taxonomy table shape:", tax_df.shape)
tax_df.head()

Taxonomy table shape: (993, 3)


Unnamed: 0,feature_id,taxonomy,Confidence
0,f1e00c6f31a5546a15c206010ff3583c,k__Fungi;p__Ascomycota;c__Saccharomycetes;o__S...,0.737076
1,1ada978c8f3ff08af2668393869257ac,Unassigned,0.369232
2,d43c87d3898407682adec71ba5b27fa4,k__Fungi;p__Ascomycota;c__Eurotiomycetes;o__Eu...,0.793387
3,f931ac9a7305cdd99a02c25a624e5bb8,k__Fungi;p__Ascomycota;c__Saccharomycetes;o__S...,0.950055
4,21c0d99a18a5bd7179fdac2b3f37ece4,k__Fungi;p__Ascomycota;c__Saccharomycetes;o__S...,0.999989


In [102]:
feature_ids = set(feature_df.index)
tax_ids     = set(tax_df["feature_id"])

shared_ids = feature_ids & tax_ids
print("Number of IDs in feature table:", len(feature_ids))
print("Number of IDs in taxonomy   :", len(tax_ids))
print("Number of shared IDs        :", len(shared_ids))

Number of IDs in feature table: 895
Number of IDs in taxonomy   : 993
Number of shared IDs        : 895


In [103]:
# Merge counts + taxonomy into one table
fungaltrait_input = counts_df.merge(
    tax_df[["feature_id", "taxonomy"]],
    on="feature_id",
    how="left"
)

# Drop features without taxonomy
fungaltrait_input = fungaltrait_input.dropna(subset=["taxonomy"])

# rename feature_id 
fungaltrait_input = fungaltrait_input.rename(columns={"feature_id": "ASV_ID"})

print("FungalTrait input shape:", fungaltrait_input.shape)
fungaltrait_input.head()

FungalTrait input shape: (895, 152)


Unnamed: 0,ASV_ID,ERR5327198,ERR5327199,ERR5327266,ERR5327282,ERR5327284,ERR5327285,ERR5327287,ERR5327288,ERR5327289,...,ERR5327587,ERR5327591,ERR5327592,ERR5327596,ERR5327599,ERR5327604,ERR5327605,ERR5327615,ERR5327620,taxonomy
0,b4b4cf0cd970d24a715d6d4ddb2173d5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,k__Fungi;p__Ascomycota;c__Ascomycota_cls_Incer...
1,d7740a632d92c0dd6324c4131c5d8d34,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,k__Fungi;p__Ascomycota;c__Saccharomycetes;o__S...
2,edda090236c8af5510b539c4bc7cd634,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,k__Fungi;p__Ascomycota;c__Saccharomycetes;o__S...
3,a112f028910db87dfd35a3a916544d74,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,k__Fungi;p__Basidiomycota;c__Tremellomycetes;o...
4,769fdf5d96caec552665c89f6952d08a,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,k__Fungi;p__Ascomycota;c__Saccharomycetes;o__S...


In [104]:
# Save table
fungaltrait_input_path = f"{data_dir}/fungaltrait_input.txt"
fungaltrait_input.to_csv(fungaltrait_input_path, sep="\t", index=False)

print("Saved FungalTrait input to:", fungaltrait_input_path)

Saved FungalTrait input to: Project_data/FungalTrait/fungaltrait_input.txt


# 5. Extract genus & species from taxonomy

In [111]:
def extract_genus_species(tax_str):
    if pd.isna(tax_str):
        return pd.Series({"genus": np.nan, "species": np.nan})

    parts = [p.strip() for p in tax_str.split(";")]

    # find g__ and s__ entries
    genus = next((p[3:] for p in parts if p.startswith("g__")), np.nan)
    species = next((p[3:] for p in parts if p.startswith("s__")), np.nan)

    # FungalTraits uses 'Genus species' with a space, not underscores
    if isinstance(species, str) and species not in ("", "unassigned", "Unassigned", "s__"):
        species_clean = species.replace("_", " ")
    else:
        species_clean = np.nan

    return pd.Series({"genus": genus, "species": species_clean})

# Apply to the taxonomy column of your merged table
tax_parsed = fungaltrait_input["taxonomy"].apply(extract_genus_species)

# Attach genus & species columns
fungaltrait_input = pd.concat([fungaltrait_input, tax_parsed], axis=1)

print(fungaltrait_input[["ASV_ID", "taxonomy", "genus", "species"]].head())

                             ASV_ID  \
0  b4b4cf0cd970d24a715d6d4ddb2173d5   
1  d7740a632d92c0dd6324c4131c5d8d34   
2  edda090236c8af5510b539c4bc7cd634   
3  a112f028910db87dfd35a3a916544d74   
4  769fdf5d96caec552665c89f6952d08a   

                                            taxonomy  \
0  k__Fungi;p__Ascomycota;c__Ascomycota_cls_Incer...   
1  k__Fungi;p__Ascomycota;c__Saccharomycetes;o__S...   
2  k__Fungi;p__Ascomycota;c__Saccharomycetes;o__S...   
3  k__Fungi;p__Basidiomycota;c__Tremellomycetes;o...   
4  k__Fungi;p__Ascomycota;c__Saccharomycetes;o__S...   

                           genus                          genus  \
0  Ascomycota_gen_Incertae_sedis  Ascomycota_gen_Incertae_sedis   
1                        Dekkera                        Dekkera   
2                     Geotrichum                     Geotrichum   
3            Cutaneotrichosporon            Cutaneotrichosporon   
4                     Clavispora                     Clavispora   

                         

In [113]:
# only keep ASVs with species-level IDs
fungaltrait_input_species = fungaltrait_input.dropna(subset=["species"])

print("Rows before species filter:", fungaltrait_input.shape[0])
print("Rows after species filter :", fungaltrait_input_species.shape[0])

Rows before species filter: 895
Rows after species filter : 657


# 6. Create mapping table for FungalTraits

In [114]:
# ASV ↔ species mapping
asv_species_map = fungaltrait_input_species[["ASV_ID", "genus", "species"]].drop_duplicates()

asv_species_path = f"{data_dir}/fungaltrait_asv_species.tsv"
asv_species_map.to_csv(asv_species_path, sep="\t", index=False)

print("Saved ASV–species map to:", asv_species_path)
asv_species_map.head()

Saved ASV–species map to: Project_data/FungalTrait/fungaltrait_asv_species.tsv


Unnamed: 0,ASV_ID,genus,genus.1,genus.2,genus.3,species,species.1,species.2,species.3
0,b4b4cf0cd970d24a715d6d4ddb2173d5,Ascomycota_gen_Incertae_sedis,Ascomycota_gen_Incertae_sedis,Ascomycota_gen_Incertae_sedis,Ascomycota_gen_Incertae_sedis,Ascomycota sp,Ascomycota sp,Ascomycota sp,Ascomycota sp
1,d7740a632d92c0dd6324c4131c5d8d34,Dekkera,Dekkera,Dekkera,Dekkera,Dekkera bruxellensis,Dekkera bruxellensis,Dekkera bruxellensis,Dekkera bruxellensis
3,a112f028910db87dfd35a3a916544d74,Cutaneotrichosporon,Cutaneotrichosporon,Cutaneotrichosporon,Cutaneotrichosporon,Cutaneotrichosporon mucoides,Cutaneotrichosporon mucoides,Cutaneotrichosporon mucoides,Cutaneotrichosporon mucoides
4,769fdf5d96caec552665c89f6952d08a,Clavispora,Clavispora,Clavispora,Clavispora,Clavispora lusitaniae,Clavispora lusitaniae,Clavispora lusitaniae,Clavispora lusitaniae
5,257defee9fc7099017bafaef525d6766,Veronaea,Veronaea,Veronaea,Veronaea,Veronaea compacta,Veronaea compacta,Veronaea compacta,Veronaea compacta


In [115]:
# Count table (ASVs × samples) with IDs
sample_cols = [c for c in fungaltrait_input_species.columns
               if c not in ["taxonomy", "genus", "species"]]

counts_only = fungaltrait_input_species[sample_cols]

counts_path = f"{data_dir}/fungaltrait_counts.tsv"
counts_only.to_csv(counts_path, sep="\t", index=False)

print("Saved counts table to:", counts_path)
counts_only.head()

Saved counts table to: Project_data/FungalTrait/fungaltrait_counts.tsv


Unnamed: 0,ASV_ID,ERR5327198,ERR5327199,ERR5327266,ERR5327282,ERR5327284,ERR5327285,ERR5327287,ERR5327288,ERR5327289,...,ERR5327586,ERR5327587,ERR5327591,ERR5327592,ERR5327596,ERR5327599,ERR5327604,ERR5327605,ERR5327615,ERR5327620
0,b4b4cf0cd970d24a715d6d4ddb2173d5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,d7740a632d92c0dd6324c4131c5d8d34,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,a112f028910db87dfd35a3a916544d74,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,769fdf5d96caec552665c89f6952d08a,0.0,0.0,0.0,0.0,0.0,0.0,0.0,6.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,257defee9fc7099017bafaef525d6766,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


# 7. FungalTraits

```
library(readr)
library(dplyr)

1) Load your exported tables
asv_species <- read_tsv("fungaltrait_asv_species.tsv")   # ASV_ID, genus, species
asv_counts  <- read_tsv("fungaltrait_counts.tsv")        # ASV_ID + samples

2) Load FungalTraits database (file name may differ)
fungaltraits <- read_tsv("FungalTraits_1.0.txt")

names(fungaltraits)[names(fungaltraits) == "Species"] <- "species"

3) Attach traits to each ASV
asv_with_traits <- asv_species %>%
  left_join(fungaltraits, by = "species")

4) Combine traits with counts
asv_trait_counts <- asv_with_traits %>%
  left_join(asv_counts, by = "ASV_ID")


relative abundance of trophic modes per sample
sample_cols <- colnames(asv_counts)[colnames(asv_counts) != "ASV_ID"]

trophic_by_sample <- asv_trait_counts %>%
  filter(!is.na(Trophic_mode)) %>%     # or the exact column name in FungalTraits
  group_by(Trophic_mode) %>%
  summarise(across(all_of(sample_cols), sum)) %>%
  ungroup()

Convert to relative abundances per sample 
trophic_rel <- trophic_by_sample
trophic_rel[sample_cols] <- apply(trophic_by_sample[sample_cols], 2, function(x) x / sum(x))
```