In [4]:
from Bio import Phylo
import numpy as np
import pandas as pd

# Load the phylogenetic tree from a Newick file
tree = Phylo.read('../../BetaDiversity/Phylogeny_ASV_based/phylo_rpca_results/labeled-phylogeny.nwk', 'newick')

# Load your BIOM or feature table in CSV/TSV format (samples as rows, taxa as columns)
feature_table = pd.read_csv('../../cleaned_asv_reads.csv')  # Adjust file path and format as needed



In [6]:
# Extract the taxa names from the tree (the leaf names)
taxa_in_tree = [clade.name for clade in tree.get_terminals()]

# Now, make sure the taxa in your feature table align with those in the tree
common_taxa = feature_table.columns.intersection(taxa_in_tree)

# Filter the feature table to include only the common taxa that are in the tree
feature_table = feature_table[common_taxa]


In [None]:
def calculate_faith_pd_for_sample(sample, tree, taxa):
    """
    Calculate Faith's PD for a single sample.
    """
    # Get the observed taxa for this sample (taxa with abundance > 0)
    observed_taxa = sample[sample > 0].index  # Filter taxa with abundance > 0

    # Sum the branch lengths for observed taxa in the tree
    pd = 0
    for taxon in observed_taxa:
        clade = tree.find_any(name=taxon)
        if clade is not None:
            pd += clade.branch_length  # Sum the branch lengths for the observed taxa
    
    return pd

# Initialize a dictionary to hold Faith's PD values for each sample
faith_pd_values = {}

# Calculate Faith's PD for each sample
for sample_name in feature_table.index:
    sample = feature_table.loc[sample_name]
    faith_pd = calculate_faith_pd_for_sample(sample, tree, feature_table)
    faith_pd_values[sample_name] = faith_pd
