In [1]:
import os
import sys
import pathlib
import dendropy
import argparse
import pandas as pd
from dendropy.calculate import treecompare as tc
from dendropy.calculate import treemeasure as tm

In [4]:
def pairwise_tree_stats(tree_dir,  base_tree):
        tree_files = [f for f in os.listdir(tree_dir) if f.endswith(".tre")]
        df = pd.DataFrame()
        btree = base_tree
        tns = dendropy.TaxonNamespace()
        t1 = dendropy.Tree.get(
                file = open(base_tree),
                schema = "newick",
                taxon_namespace = tns
                )


        for file  in tree_files:
                t2_name = str(file)[:-len("_biCons.tre")]
                t2 = dendropy.Tree.get(
                        file = open(base_tree),
                        schema = "newick",
                        taxon_namespace = tns
                        )
                tns,t1,t2 = standardize_namespace(tns, t1, t2)
                t1.encode_bipartitions()
                t2.encode_bipartitions()
                fal_pos_neg = tc.false_positives_and_negatives(t1, t2)

                row_data = {
                        'BaseTree':btree,
                        'genTree':t2_name,
                        'falePosBip':fal_pos_neg[0],
                        'falseNegBip':fal_pos_neg[1],
                        'uRF':tc.symmetric_difference(t1,t2),
                        'wRF':tc.weighted_robinson_foulds_distance(t1,t2),
                        'sackinBase':tm.sackin_index(t1),
                        'sackinGen':tm.sackin_index(t2)
                }
                df = df.append(row_data, ignore_index = True)
        df =df[['BaseTree', 'genTree', 'falePosBip', 'falseNegBip', 'uRF', 'wRF', 'sackinBase', 'sackinGen']]

        return df
    
def standardize_namespace(tns, t1, t2):
    """requires two trees with a common taxon namespace"""
    tree_1_taxa = set()
    tree_2_taxa = set()

    for tip in t1.leaf_node_iter():
        tree_1_taxa.add(tip.taxon)

    for tip in t2.leaf_node_iter():
        tree_2_taxa.add(tip.taxon)

    shared_taxa = tree_1_taxa.intersection(tree_2_taxa)

    assert(len(shared_taxa) >= 1)
    #print("These two trees have {s} shared taxa".format(s=len(shared_taxa)))

    t1.retain_taxa(shared_taxa)
    t2.retain_taxa(shared_taxa)
    return(tns, t1, t2)


In [5]:
pairwise_tree_stats("bestTrees/", 'example.tre')

  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index = True)
  df = df.append(row_data, ignore_index 

Unnamed: 0,BaseTree,genTree,falePosBip,falseNegBip,uRF,wRF,sackinBase,sackinGen
0,example.tre,s23,0,0,0,0.0,5.0,5.0
1,example.tre,s22,0,0,0,0.0,5.0,5.0
2,example.tre,s1,0,0,0,0.0,5.0,5.0
3,example.tre,s11,0,0,0,0.0,5.0,5.0
4,example.tre,s20,0,0,0,0.0,5.0,5.0
5,example.tre,s18,0,0,0,0.0,5.0,5.0
6,example.tre,s4,0,0,0,0.0,5.0,5.0
7,example.tre,s21,0,0,0,0.0,5.0,5.0
8,example.tre,s19,0,0,0,0.0,5.0,5.0
9,example.tre,s9,0,0,0,0.0,5.0,5.0
