In [None]:
# Ch10-5 Playing Recursively with Trees [Updated to use RAxML-NG]

# todo - remove numbers of make them match up

In [None]:
# 23.  Load the raxml-generated tree for Ebola viruses
import dendropy

# Define the correct tree file output from RAxML-NG
tree_file = "ebola_tree.raxml.bestTreeCollapsed"  # Based on raxml-ng output from previous recipe

# Load the tree using Newick format (RAxML-NG outputs trees in Newick)
ebola_raxml = dendropy.Tree.get_from_path(tree_file, schema="newick")

# Optional: Print the tree to verify
print(ebola_raxml.as_string(schema="newick"))


In [None]:
# 24. Compute the level of each node
def compute_level(node, level=0): 
    for child in node.child_nodes(): 
        compute_level(child, level + 1) 
    if node.taxon is not None: 
        print("%s: %d %d" % (node.taxon, node.level(), level)) 
compute_level(ebola_raxml.seed_node) 

In [None]:
# 25.  Compute the height of each node
def compute_height(node): 
    children = node.child_nodes() 
    if len(children) == 0: 
        height = 0 
    else: 
        height = 1 + max(map(lambda x: compute_height(x), children)) 
    desc = node.taxon or 'Internal' 
    print("%s: %d %d" % (desc, height, node.level())) 
    return height 
compute_height(ebola_raxml.seed_node) 

In [None]:
# 26.  Compute the number of offspring for each node
def compute_nofs(node): 
    children = node.child_nodes() 
    nofs = len(children) 
    map(lambda x: compute_nofs(x), children) 
    desc = node.taxon or 'Internal' 
    print("%s: %d %d" % (desc, nofs, node.level())) 
compute_nofs(ebola_raxml.seed_node) 

In [None]:
# 27.  Print out the leaves
def print_nodes(node): 
    for child in node.child_nodes(): 
        print_nodes(child) 
    if node.taxon is not None: 
        print('%s (%d)' % (node.taxon, node.level())) 
print_nodes(ebola_raxml.seed_node) 

In [None]:
# 28.  Print leaf nodes in breadth-first manner
from collections import deque 
def print_breadth(tree): 
    queue = deque() 
    queue.append(tree.seed_node) 
    while len(queue) > 0: 
        process_node = queue.popleft() 
        if process_node.taxon is not None: 
            print('%s (%d)' % (process_node.taxon, process_node.level())) 
        else: 
            for child in process_node.child_nodes(): 
                queue.append(child) 
print_breadth(ebola_raxml) 

In [None]:
# 29.  Getting back to the real dataset
from copy import deepcopy 
simple_ebola = deepcopy(ebola_raxml) 
def simplify_tree(node): 
    prefs = set() 
    for leaf in node.leaf_nodes(): 
        my_toks = leaf.taxon.label.split(' ') 
        if my_toks[0] == 'EBOV': 
            prefs.add('EBOV' + my_toks[1]) 
        else: 
            prefs.add(my_toks[0]) 
    if len(prefs) == 1: 
        print(prefs, len(node.leaf_nodes())) 
        node.taxon = dendropy.Taxon(label=list(prefs)[0]) 
        node.set_child_nodes([]) 
    else: 
        for child in node.child_nodes(): 
            simplify_tree(child) 
simplify_tree(simple_ebola.seed_node) 
simple_ebola.ladderize() 
simple_ebola.write_to_path('ebola_simple.nex', 'nexus') 

In [None]:
## End of Notebook ##