In [1]:
from ete3 import Tree

In [2]:
# Set a the file name here of the tree to test things on.
fileName = '4Leaf.tree'

In [3]:
def getLeafNodeNames(tree):
    '''
    return a list of leaf node names in alphabetical order
    '''
    leafNodeNames = []
    for node in tree.traverse("postorder"):
        if node.is_leaf():
            leafNodeNames.append(node.name)
    leafNodeNames.sort()
    return leafNodeNames

In [4]:
def getTopologyMatrix(newickFile):
    '''Take a newickFile and return a matrix of number of nodes between leaf nodes'''
    with open(newickFile,'r') as f:
        newickString = f.read()
    tree = Tree(newickString)
    
    leafNodeNames = getLeafNodeNames(tree)
    
    topologyMatrix = []
    for leafNode in leafNodeNames:
        internalNodesBetween = []
        for otherLeafNode in leafNodeNames:
            if leafNode == otherLeafNode:
                numberInternalNodes = 0
            else:
                numberInternalNodes = int(tree.get_distance(leafNode, otherLeafNode, topology_only=True))
            internalNodesBetween.append(numberInternalNodes)
        topologyMatrix.append(internalNodesBetween)
    return(topologyMatrix)

In [5]:
def getDistanceMatrix(newickFile):
    '''Take a newick file return a matrix of distances between leaf nodes.'''
    with open(newickFile,'r') as f:
        newickString = f.read()
    tree = Tree(newickString)
    
    leafNodeNames = getLeafNodeNames(tree)
    
    distanceMatrix = []
    for leafNode in leafNodeNames:
        distancesFromNode = []
        for otherLeafNode in leafNodeNames:
            if leafNode == otherLeafNode:
                distance = 0
            else:
                distance = tree.get_distance(leafNode, otherLeafNode)
            distancesFromNode.append(distance)
        distanceMatrix.append(distancesFromNode)
    return(distanceMatrix)

In [6]:
# Test getTopologyMatrix
getTopologyMatrix('./../data/testdata/'+fileName)

[[0, 1, 2, 2], [1, 0, 2, 2], [2, 2, 0, 1], [2, 2, 1, 0]]

In [7]:
# Test getDistanceMatrix
getDistanceMatrix('./../data/testdata/'+fileName)

[[0, 0.361399, 0.39885499999999996, 0.497874],
 [0.361399, 0, 0.464316, 0.563335],
 [0.39885499999999996, 0.464316, 0, 0.430593],
 [0.497874, 0.5633349999999999, 0.430593, 0]]

In [8]:
testMatrix = getTopologyMatrix('./../data/testdata/'+fileName)

In [9]:
def flattenSymetricalMatrix(matrix):
    ''' Take a 2d symetrical matrix (list of lists).
        Return a flattened version (single list)
        for example:
           input [[0,2,3],
                  [2,0,4],
                  [3,4,0]]
           output [2,3,4]
                  '''
    flattenedMatrix = []
    for i in range(len(matrix)):
        if i == 0:
            pass
        else:
            for elm in matrix[i][0:i]:
                flattenedMatrix.append(elm)
        
    return flattenedMatrix
        
# Test
flattenedList = flattenSymetricalMatrix(testMatrix)
print(flattenedList)        

[1, 2, 2, 2, 2, 1]
