In [1]:
import dendropy

In [2]:
ebola_raxml = dendropy.Tree.get_from_path('my_ebola.nex', 'nexus')

In [3]:
def compute_level(node, level=0):
    for child in node.child_nodes():
        compute_level(child, level + 1)
    if node.taxon is not None:
        print("%s: %d %d" % (node.taxon, node.level(), level))

compute_level(ebola_raxml.seed_node)

'BDBV_KC545395': 2 2
'BDBV_KC545396': 3 3
'EBOV_2007_KC242788': 11 11
'EBOV_2007_KC242787': 11 11
'EBOV_2007_KC242786': 12 12
'EBOV_2007_KC242789': 12 12
'EBOV_2007_KC242784': 11 11
'EBOV_2007_KC242785': 10 10
'EBOV_2007_KC242790': 10 10
'EBOV_1995_KC242799': 10 10
'EBOV_1995_KC242796': 10 10
'EBOV_1976_KC242801': 10 10
'EBOV_1976_AF272001': 10 10
'EBOV_2014_KM034549': 9 9
'EBOV_2014_KM034550': 10 10
'EBOV_2014_KM034555': 11 11
'EBOV_2014_KM034560': 16 16
'EBOV_2014_KM034553': 18 18
'EBOV_2014_KM034552': 18 18
'EBOV_2014_KM034556': 18 18
'EBOV_2014_KM034557': 18 18
'EBOV_2014_KM034551': 16 16
'EBOV_2014_KM034558': 16 16
'EBOV_2014_KM034562': 14 14
'EBOV_2014_KM034554': 13 13
'EBOV_2014_KM034559': 13 13
'EBOV_2014_KM034561': 13 13
'EBOV_2014_KM034563': 8 8
'SUDV_EU338380': 9 9
'SUDV_KC242783': 10 10
'SUDV_FJ968794': 10 10
'SUDV_KC589025': 9 9
'SUDV_AY729654': 10 10
'SUDV_JN638998': 10 10
'RESTV_FJ621584': 8 8
'RESTV_FJ621585': 10 10
'RESTV_JX477166': 11 11
'RESTV_AB050936': 11 11
'RESTV

In [4]:
def compute_height(node):
    children = node.child_nodes()
    if len(children) == 0:
        height = 0
    else:
        height = 1 + max(map(lambda x: compute_height(x), children))
    desc = node.taxon or 'Internal'
    print("%s: %d %d" % (desc, height, node.level()))
    return height

compute_height(ebola_raxml.seed_node)

'BDBV_KC545395': 0 2
'BDBV_KC545396': 0 3
'EBOV_2007_KC242788': 0 11
'EBOV_2007_KC242787': 0 11
Internal: 1 10
'EBOV_2007_KC242786': 0 12
'EBOV_2007_KC242789': 0 12
Internal: 1 11
'EBOV_2007_KC242784': 0 11
Internal: 2 10
Internal: 3 9
'EBOV_2007_KC242785': 0 10
'EBOV_2007_KC242790': 0 10
Internal: 1 9
Internal: 4 8
'EBOV_1995_KC242799': 0 10
'EBOV_1995_KC242796': 0 10
Internal: 1 9
'EBOV_1976_KC242801': 0 10
'EBOV_1976_AF272001': 0 10
Internal: 1 9
Internal: 2 8
Internal: 5 7
'EBOV_2014_KM034549': 0 9
'EBOV_2014_KM034550': 0 10
'EBOV_2014_KM034555': 0 11
'EBOV_2014_KM034560': 0 16
'EBOV_2014_KM034553': 0 18
'EBOV_2014_KM034552': 0 18
Internal: 1 17
'EBOV_2014_KM034556': 0 18
'EBOV_2014_KM034557': 0 18
Internal: 1 17
Internal: 2 16
Internal: 3 15
'EBOV_2014_KM034551': 0 16
'EBOV_2014_KM034558': 0 16
Internal: 1 15
Internal: 4 14
'EBOV_2014_KM034562': 0 14
Internal: 5 13
'EBOV_2014_KM034554': 0 13
Internal: 6 12
'EBOV_2014_KM034559': 0 13
'EBOV_2014_KM034561': 0 13
Internal: 1 12
Intern

18

In [5]:
def compute_nofs(node):
    children = node.child_nodes()
    nofs = len(children)
    map(lambda x: compute_nofs(x), children)
    desc = node.taxon or 'Internal'
    print("%s: %d %d" % (desc, nofs, node.level()))

compute_nofs(ebola_raxml.seed_node)

Internal: 3 0


In [6]:
def print_nodes(node):
    for child in node.child_nodes():
        print_nodes(child)
    if node.taxon is not None:
        print('%s (%d)' % (node.taxon, node.level()))

print_nodes(ebola_raxml.seed_node)

'BDBV_KC545395' (2)
'BDBV_KC545396' (3)
'EBOV_2007_KC242788' (11)
'EBOV_2007_KC242787' (11)
'EBOV_2007_KC242786' (12)
'EBOV_2007_KC242789' (12)
'EBOV_2007_KC242784' (11)
'EBOV_2007_KC242785' (10)
'EBOV_2007_KC242790' (10)
'EBOV_1995_KC242799' (10)
'EBOV_1995_KC242796' (10)
'EBOV_1976_KC242801' (10)
'EBOV_1976_AF272001' (10)
'EBOV_2014_KM034549' (9)
'EBOV_2014_KM034550' (10)
'EBOV_2014_KM034555' (11)
'EBOV_2014_KM034560' (16)
'EBOV_2014_KM034553' (18)
'EBOV_2014_KM034552' (18)
'EBOV_2014_KM034556' (18)
'EBOV_2014_KM034557' (18)
'EBOV_2014_KM034551' (16)
'EBOV_2014_KM034558' (16)
'EBOV_2014_KM034562' (14)
'EBOV_2014_KM034554' (13)
'EBOV_2014_KM034559' (13)
'EBOV_2014_KM034561' (13)
'EBOV_2014_KM034563' (8)
'SUDV_EU338380' (9)
'SUDV_KC242783' (10)
'SUDV_FJ968794' (10)
'SUDV_KC589025' (9)
'SUDV_AY729654' (10)
'SUDV_JN638998' (10)
'RESTV_FJ621584' (8)
'RESTV_FJ621585' (10)
'RESTV_JX477166' (11)
'RESTV_AB050936' (11)
'RESTV_JX477165' (10)
'RESTV_FJ621583' (10)
'TAFV_FJ217162' (5)
'BDBV_FJ217

In [7]:
from collections import deque

def print_breadth(tree):
    queue = deque()
    queue.append(tree.seed_node)
    while len(queue) > 0:
        process_node = queue.popleft()
        if process_node.taxon is not None:
            print('%s (%d)' % (process_node.taxon, process_node.level()))
        else:
            for child in process_node.child_nodes():
                queue.append(child)

print_breadth(ebola_raxml)

'BDBV_KC545394' (1)
'BDBV_KC545393' (1)
'BDBV_KC545395' (2)
'BDBV_KC545396' (3)
'BDBV_FJ217161' (4)
'TAFV_FJ217162' (5)
'EBOV_2014_KM034563' (8)
'RESTV_FJ621584' (8)
'EBOV_2014_KM034549' (9)
'SUDV_EU338380' (9)
'SUDV_KC589025' (9)
'EBOV_2007_KC242785' (10)
'EBOV_2007_KC242790' (10)
'EBOV_1995_KC242799' (10)
'EBOV_1995_KC242796' (10)
'EBOV_1976_KC242801' (10)
'EBOV_1976_AF272001' (10)
'EBOV_2014_KM034550' (10)
'SUDV_KC242783' (10)
'SUDV_FJ968794' (10)
'SUDV_AY729654' (10)
'SUDV_JN638998' (10)
'RESTV_FJ621585' (10)
'RESTV_JX477165' (10)
'RESTV_FJ621583' (10)
'EBOV_2007_KC242788' (11)
'EBOV_2007_KC242787' (11)
'EBOV_2007_KC242784' (11)
'EBOV_2014_KM034555' (11)
'RESTV_JX477166' (11)
'RESTV_AB050936' (11)
'EBOV_2007_KC242786' (12)
'EBOV_2007_KC242789' (12)
'EBOV_2014_KM034554' (13)
'EBOV_2014_KM034559' (13)
'EBOV_2014_KM034561' (13)
'EBOV_2014_KM034562' (14)
'EBOV_2014_KM034560' (16)
'EBOV_2014_KM034551' (16)
'EBOV_2014_KM034558' (16)
'EBOV_2014_KM034553' (18)
'EBOV_2014_KM034552' (18)
'EB

In [8]:
from copy import deepcopy
simple_ebola = deepcopy(ebola_raxml)

def simplify_tree(node):
    prefs = set()
    for leaf in node.leaf_nodes():
        my_toks = leaf.taxon.label.split(' ')[0].split('_')
        if my_toks[0] == 'EBOV':
            prefs.add('EBOV' + my_toks[1])
        else:
            prefs.add(my_toks[0])
    if len(prefs) == 1:
        print(prefs, len(node.leaf_nodes()))
        node.taxon = dendropy.Taxon(label=list(prefs)[0])
        #node.collapse_clade()
        node.set_child_nodes([])
    else:
        for child in node.child_nodes():
            simplify_tree(child)

simplify_tree(simple_ebola.seed_node)
simple_ebola.ladderize()
simple_ebola.write_to_path('ebola_simple.nex', 'nexus')

{'BDBV'} 1
{'BDBV'} 1
{'EBOV2007'} 7
{'EBOV1995'} 2
{'EBOV1976'} 2
{'EBOV2014'} 15
{'SUDV'} 6
{'RESTV'} 6
{'TAFV'} 1
{'BDBV'} 1
{'BDBV'} 1
{'BDBV'} 1
