In [None]:
import pandas as pd
from dendropy import Tree

This is a simple script to extract the estimate substitution rates found along branches that descendent from multiple MRCAs.

First, we load the tree.

In [None]:
tree = Tree.get( path="../../beast-analyses/2025-05-28_constant_relaxed.mcc.tree", schema="nexus", preserve_underscores=True )
taxa = [i.label for i in tree.taxon_namespace]

Next, we load the metadata. We will be assessing how substitution rate is associated with lineage so we extract the `te` column specifically.

In [None]:
md = list()
for file in ["supplemental_data1.csv", "supplemental_data2.csv"]:
    df = pd.read_csv( "../../data/" + file, usecols=["taxa", "te", "included_analysis"] )
    df["workshop"] = (file == "supplemental_data1.csv")
    md.append( df )
    
md = pd.concat( md )
md = md.loc[md["taxa"].isin(taxa)]
md = md.drop_duplicates()
assert len( md ) == len( taxa)
md.head()

Lastly, we iterate through the third wave 7PET lineages, identify their MRCA, and collect all of the substitution rates on branches that descend from that MRCA.

In [None]:
results = {
    "lineage" : [],
    "internal" : [],
    "median_rate" : [],
    "branch_length" : []
}

for lineage in ["T9", "T10", "T11", "T12", "T13", "T15"]:
    representatives = md.loc[md["te"]==lineage,"taxa"].to_list()
    #if lineage == "T13":
    #    representatives = ["Africa|KEN|KEN-2007-008|T13|2007-01-01", "Asia|YEM|ERR2269621|T13|2017-01-01"]
    lineage_mrca = tree.mrca( taxon_labels=representatives )
    print( f"{lineage}: {lineage_mrca.distance_from_tip()}")
    for node in lineage_mrca.postorder_iter():
        results["lineage"].append( lineage )
        results["internal"].append( node.is_internal() )
        rate = node.annotations["rate_median"].value
        if rate == "":
            rate = node.annotations["rate"].value
        results["median_rate"].append( float( rate ) )
        results["branch_length"].append( node.edge_length )
        
results = pd.DataFrame( results )
results.to_csv( "lineage_rates.csv", index=False )
results.head()