In [1]:
# !pip install dendropy

In [32]:
from methods import read_tree, get_tree2_distances, noisify_distances, baseline_method, get_matches, get_scores

# STEP 1 read a tree in Newick format
# path = 'trees/test_tree.txt'
# path = 'trees/sym_tree_T1.txt'
path = 'trees/big_tree.txt'
distances1 = read_tree(path)

# STEP 2 change some of the leaves names and add noise to the distances (multiply each distances by a random number between .9 and 1.1)
changes = {'B': 'X', 'D': 'R', 'F': 'W', 'H': 'Z','E': 'Y'}

n_common_leaves = 60
n_diff_leaves = len(distances1) - n_common_leaves
common_leaves = distances1.index.to_series().sample(n_diff_leaves, replace=False).to_list()
changes = {c:str(i) for i,c in enumerate(common_leaves)}

distances2 = get_tree2_distances(distances1, changes)
distances2 = noisify_distances(distances2, .1)

# STEP 3 rank potential matches according to a method.

# Baseline method pairs leaves of T1 with leaves of T2 based on how similar their distance to all common leaves are.
matches = baseline_method(distances1, distances2, epsilon = 0)

# STEP 4 Find the best match in T2 for leaves of T1 and inversely.
t1_matches = get_matches(matches, level=0, changes=changes)
t2_matches = get_matches(matches, level=1, changes=changes)

# STEP 5 compute a score: %age of correct pairs (between 0 and 1)
scores = get_scores(t1_matches, t2_matches)
scores

ratio: 0.9547138244847005


{'t1_score': 0.9444444444444444,
 't2_score': 0.8181818181818182,
 'mean_score': 0.8813131313131313}

In [28]:
distances1

Unnamed: 0,10020.ENSDORP00000002346,10090.ENSMUSP00000018805,10116.ENSRNOP00000003804,10141.ENSCPOP00000003239,10141.ENSCPOP00000018381,10228.TriadP54105,121225.PHUM413450-PA,12957.ACEP 00009457-PA,13037.EHJ66433,132908.ENSPVAP00000006353,...,9669.ENSMPUP00000014182,9685.ENSFCAP00000004377,9739.ENSTTRP00000004916,9785.ENSLAFP00000023898,9796.ENSECAP00000006507,9813.ENSPCAP00000009886,9823.ENSSSCP00000018275,9913.ENSBTAP00000007999,9986.ENSOCUP00000010215,99883.ENSTNIP00000008530
10020.ENSDORP00000002346,0.000000,0.280100,0.280496,0.353467,0.328666,3.011485,4.220502,4.197901,4.446018,0.290968,...,0.420094,0.333849,0.328739,0.347662,0.282681,0.372704,0.300666,0.359690,0.533749,1.184953
10090.ENSMUSP00000018805,0.280100,0.000000,0.065632,0.301540,0.276738,2.980610,4.189628,4.167026,4.415143,0.260093,...,0.389219,0.302975,0.297865,0.316787,0.251807,0.341830,0.269791,0.328816,0.502874,1.154078
10116.ENSRNOP00000003804,0.280496,0.065632,0.000000,0.301935,0.277134,2.981006,4.190023,4.167422,4.415539,0.260489,...,0.389615,0.303370,0.298260,0.317183,0.252202,0.342225,0.270187,0.329211,0.503270,1.154474
10141.ENSCPOP00000003239,0.353467,0.301540,0.301935,0.000000,0.087254,3.053978,4.262995,4.240394,4.488510,0.333461,...,0.462587,0.376342,0.371232,0.390155,0.325174,0.415197,0.343158,0.402183,0.576242,1.227446
10141.ENSCPOP00000018381,0.328666,0.276738,0.277134,0.087254,0.000000,3.029176,4.238194,4.215592,4.463709,0.308659,...,0.437785,0.351541,0.346431,0.365353,0.300373,0.390396,0.318357,0.377382,0.551440,1.202644
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9813.ENSPCAP00000009886,0.372704,0.341830,0.342225,0.415197,0.390396,2.944477,4.153495,4.130893,4.379010,0.260071,...,0.389197,0.302953,0.297843,0.135048,0.251785,0.000000,0.269769,0.328794,0.553544,1.117945
9823.ENSSSCP00000018275,0.300666,0.269791,0.270187,0.343158,0.318357,2.908550,4.117567,4.094966,4.343082,0.160406,...,0.289532,0.203287,0.127492,0.244727,0.152119,0.269769,0.000000,0.158443,0.481506,1.082018
9913.ENSBTAP00000007999,0.359690,0.328816,0.329211,0.402183,0.377382,2.967575,4.176592,4.153990,4.402107,0.219431,...,0.348557,0.262312,0.155757,0.303751,0.211144,0.328794,0.158443,0.000000,0.540530,1.141043
9986.ENSOCUP00000010215,0.533749,0.502874,0.503270,0.576242,0.551440,3.192325,4.401342,4.378741,4.626858,0.471808,...,0.600934,0.514690,0.509580,0.528502,0.463522,0.553544,0.481506,0.540530,0.000000,1.365793


In [None]:
# Modify the tree to change the name of one node
# design a prototype method to pair nodes
# test prototype on modified tree