# 1. Import Packages

In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import qiime2 as q2
from qiime2 import Visualization
from scipy.stats import shapiro, kruskal, f_oneway

# 2. Data Directionary

In [3]:
# Location
data_dir = "Project_data/FungalTrait"
! mkdir -p "$data_dir"

In [4]:
# Paths to project inputs
input_table    = "Project_data/Taxonomy/table_filtered.qza"
input_taxonomy = "Project_data/Taxonomy/taxonomy_pretrained.qza"
input_metadata = "Project_data/Metadata/updated_fungut_metadata.tsv"

# 3. Export QIIME2 Artifacts to TSV

In [5]:
# Export taxonomy file
! qiime tools export \
    --input-path "$input_taxonomy" \
    --output-path "$data_dir"

# Export feature table (BIOM)
! qiime tools export \
    --input-path "$input_table" \
    --output-path "$data_dir"

  import pkg_resources
[32mExported Project_data/Taxonomy/taxonomy_pretrained.qza as TSVTaxonomyDirectoryFormat to directory Project_data/FungalTrait[0m
  import pkg_resources
[32mExported Project_data/Taxonomy/table_filtered.qza as BIOMV210DirFmt to directory Project_data/FungalTrait[0m
[0m[?25h

In [6]:
# Convert BIOM -> TSV
biom_path = f"{data_dir}/feature-table.biom"
tsv_path = f"{data_dir}/feature-table.tsv"

! biom convert \
    -i "$biom_path" \
    -o "$tsv_path" \
    --to-tsv

# 4. Build the input table

In [7]:
feature_tsv = f"{data_dir}/feature-table.tsv"

# Load feature table
feature_df = pd.read_csv(
    feature_tsv,
    sep="\t",
    skiprows=[0],      
    index_col=0     
)

feature_df.index.name = "feature_id"
counts_df = feature_df.reset_index() 

print("Feature table shape:", feature_df.shape)
feature_df.head()

Feature table shape: (895, 150)


Unnamed: 0_level_0,ERR5327198,ERR5327199,ERR5327266,ERR5327282,ERR5327284,ERR5327285,ERR5327287,ERR5327288,ERR5327289,ERR5327300,...,ERR5327586,ERR5327587,ERR5327591,ERR5327592,ERR5327596,ERR5327599,ERR5327604,ERR5327605,ERR5327615,ERR5327620
feature_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
b0290b01e6c7d848349196cd2cef76d6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
a3c79ca386d6d925149ffc73009ad47a,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
d2518782d51468e37876a8e7d442bbb7,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7a609d6d6c4c0c32e7645928b3f90f85,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3317e6ac5c82a334f021023077df633d,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [8]:
tax_tsv     = f"{data_dir}/taxonomy.tsv"

# Load taxonomy table
tax_df = pd.read_csv(
    tax_tsv,
    sep="\t",
    comment="#"
)
tax_df = tax_df.rename(columns={
    "Feature ID": "feature_id",
    "Taxon": "taxonomy"
})

# Standardise column names
tax_df = tax_df.rename(columns={
    "Feature ID": "feature_id",
    "Taxon": "taxonomy"
})

print("Taxonomy table shape:", tax_df.shape)
tax_df.head()

Taxonomy table shape: (734, 3)


Unnamed: 0,feature_id,taxonomy,Confidence
0,f872ab159e2219de905e49b556b85d05,k__Fungi;p__Ascomycota;c__Saccharomycetes;o__S...,0.999972
1,d3b20e3fa30a65662fc3a6e0057d6e90,k__Fungi;p__Ascomycota;c__Ascomycota_cls_Incer...,0.999984
2,333df8f222ab30bfdffd5b7d3c5a789b,k__Fungi;p__Ascomycota;c__Saccharomycetes;o__S...,0.918087
3,beba691fc7ee5c7219589a54ec45b0bd,k__Fungi;p__Ascomycota;c__Eurotiomycetes;o__Eu...,0.841928
4,5e7d76c82d92bc95d366c4c5796d3b21,Unassigned,0.380525


In [9]:
feature_ids = set(feature_df.index)
tax_ids     = set(tax_df["feature_id"])

shared_ids = feature_ids & tax_ids
print("Number of IDs in feature table:", len(feature_ids))
print("Number of IDs in taxonomy   :", len(tax_ids))
print("Number of shared IDs        :", len(shared_ids))

Number of IDs in feature table: 895
Number of IDs in taxonomy   : 734
Number of shared IDs        : 0


In [10]:
# Merge counts + taxonomy into one table
fungaltrait_input = counts_df.merge(
    tax_df[["feature_id", "taxonomy"]],
    on="feature_id",
    how="left"
)

# Drop features without taxonomy
fungaltrait_input = fungaltrait_input.dropna(subset=["taxonomy"])

# rename feature_id 
fungaltrait_input = fungaltrait_input.rename(columns={"feature_id": "ASV_ID"})

print("FungalTrait input shape:", fungaltrait_input.shape)
fungaltrait_input.head()

FungalTrait input shape: (0, 152)


Unnamed: 0,ASV_ID,ERR5327198,ERR5327199,ERR5327266,ERR5327282,ERR5327284,ERR5327285,ERR5327287,ERR5327288,ERR5327289,...,ERR5327587,ERR5327591,ERR5327592,ERR5327596,ERR5327599,ERR5327604,ERR5327605,ERR5327615,ERR5327620,taxonomy


In [11]:
# Save table
fungaltrait_input_path = f"{data_dir}/fungaltrait_input.txt"
fungaltrait_input.to_csv(fungaltrait_input_path, sep="\t", index=False)

print("Saved FungalTrait input to:", fungaltrait_input_path)

NameError: name 'fungaltrait_inputt_path' is not defined

# 5. Extract genus & species from taxonomy

In [None]:
def extract_genus_species(tax_str):
    if pd.isna(tax_str):
        return pd.Series({"genus": np.nan, "species": np.nan})

    parts = [p.strip() for p in tax_str.split(";")]

    # find g__ and s__ entries
    genus = next((p[3:] for p in parts if p.startswith("g__")), np.nan)
    species = next((p[3:] for p in parts if p.startswith("s__")), np.nan)

    # FungalTraits uses 'Genus species' with a space, not underscores
    if isinstance(species, str) and species not in ("", "unassigned", "Unassigned", "s__"):
        species_clean = species.replace("_", " ")
    else:
        species_clean = np.nan

    return pd.Series({"genus": genus, "species": species_clean})

# Apply to the taxonomy column of your merged table
tax_parsed = fungaltrait_input["taxonomy"].apply(extract_genus_species)

# Attach genus & species columns
fungaltrait_input = pd.concat([fungaltrait_input, tax_parsed], axis=1)

print(fungaltrait_input[["ASV_ID", "taxonomy", "genus", "species"]].head())

In [None]:
# only keep ASVs with species-level IDs
fungaltrait_input_species = fungaltrait_input_input.dropna(subset=["species"])

print("Rows before species filter:", fungaltrait_input.shape[0])
print("Rows after species filter :", fungaltrait_inputt_species.shape[0])

# 6. Create mapping table for FungalTraits

In [None]:
# ASV ↔ species mapping
asv_species_map = fungaltrait_input_species[["ASV_ID", "genus", "species"]].drop_duplicates()

asv_species_path = f"{data_dir}/fungaltrait_asv_species.tsv"
asv_species_map.to_csv(asv_species_path, sep="\t", index=False)

print("Saved ASV–species map to:", asv_species_path)
asv_species_map.head()

In [None]:
# Count table (ASVs × samples) with IDs
sample_cols = [c for c in fungaltrait_input_species.columns
               if c not in ["taxonomy", "genus", "species"]]

counts_only = fungaltrait_input_species[sample_cols]

counts_path = f"{data_dir}/fungaltrait_counts.tsv"
counts_only.to_csv(counts_path, sep="\t", index=False)

print("Saved counts table to:", counts_path)
counts_only.head()

# 7. FungalTraits

```
library(readr)
library(dplyr)

1) Load your exported tables
asv_species <- read_tsv("fungaltrait_asv_species.tsv")   # ASV_ID, genus, species
asv_counts  <- read_tsv("fungaltrait_counts.tsv")        # ASV_ID + samples

2) Load FungalTraits database (file name may differ)
fungaltraits <- read_tsv("FungalTraits_1.0.txt")

names(fungaltraits)[names(fungaltraits) == "Species"] <- "species"

3) Attach traits to each ASV
asv_with_traits <- asv_species %>%
  left_join(fungaltraits, by = "species")

4) Combine traits with counts
asv_trait_counts <- asv_with_traits %>%
  left_join(asv_counts, by = "ASV_ID")


relative abundance of trophic modes per sample
sample_cols <- colnames(asv_counts)[colnames(asv_counts) != "ASV_ID"]

trophic_by_sample <- asv_trait_counts %>%
  filter(!is.na(Trophic_mode)) %>%     # or the exact column name in FungalTraits
  group_by(Trophic_mode) %>%
  summarise(across(all_of(sample_cols), sum)) %>%
  ungroup()

Convert to relative abundances per sample 
trophic_rel <- trophic_by_sample
trophic_rel[sample_cols] <- apply(trophic_by_sample[sample_cols], 2, function(x) x / sum(x))
```