In [1]:
from malala import * 

### Generating tree

In [2]:
enumerate_labelled_trees([1,2])

[[1, 2]]

In [3]:
enumerate_labelled_trees([1,2,3])

[[[1, 3], 2], [1, [2, 3]], [[1, 2], 3]]

In [4]:
enumerate_labelled_trees([1,2,3,4])

[[[1, 3], [2, 4]],
 [[[1, 3], 2], 4],
 [[[1, 4], 3], 2],
 [[1, [3, 4]], 2],
 [[[1, 3], 4], 2],
 [[1, 4], [2, 3]],
 [[1, [2, 3]], 4],
 [1, [[2, 4], 3]],
 [1, [2, [3, 4]]],
 [1, [[2, 3], 4]],
 [[1, 2], [3, 4]],
 [[[1, 2], 3], 4],
 [[[1, 4], 2], 3],
 [[1, [2, 4]], 3],
 [[[1, 2], 4], 3]]

### Couting the number of changes using Sankoof algorithm 

In [5]:
cost_matrix=np.array([[0,2.5,1,2.5],[2.5,0,2.5,1],[1,2.5,0,2.5],[2.5,1,2.5,0]])
print(cost_matrix)

[[0.  2.5 1.  2.5]
 [2.5 0.  2.5 1. ]
 [1.  2.5 0.  2.5]
 [2.5 1.  2.5 0. ]]


In [6]:
tree=[[1,2],[3,[4,5]]]
alphabet=['A','C','G','T']
observedCharacters=['C','A','C','A','G']
#compute the parsimony score for the tree above
s,v=Sankoff(tree,alphabet,observedCharacters,cost_matrix)
print('The parsimony score of this phylogeny is',s,'. \nThe vector cost in the root of this is',v,".\n")

The parsimony score of this phylogeny is 6.0 . 
The vector cost in the root of this is [6. 6. 7. 8.] .



### Genome generator

In [7]:
initialGenome=5*['a']
templateTree=[1.0, [5.0,[1.0],[5.0]]  , [0.05,[0.1],[0.1]]]
simulatedTree=generateDriver(initialGenome,templateTree)
print("Tree with edges weighted by molecular time.")
print(templateTree)
print("Simulated tree")
print(simulatedTree)

Tree with edges weighted by molecular time.
[1.0, [5.0, [1.0], [5.0]], [0.05, [0.1], [0.1]]]
Simulated tree
[['c', 'g', 'g', 'a', 'c'], [['c', 'a', 'g', 't', 'c'], [['c', 'a', 'c', 'g', 'c']], [['t', 'c', 'c', 'g', 'g']]], [['c', 'g', 'g', 'a', 'c'], [['c', 'g', 'g', 'a', 'c']], [['c', 'g', 'g', 'c', 'c']]]]


In [8]:
print(simulatedTree)

[['c', 'g', 'g', 'a', 'c'], [['c', 'a', 'g', 't', 'c'], [['c', 'a', 'c', 'g', 'c']], [['t', 'c', 'c', 'g', 'g']]], [['c', 'g', 'g', 'a', 'c'], [['c', 'g', 'g', 'a', 'c']], [['c', 'g', 'g', 'c', 'c']]]]


In [9]:
def convertTree(tree):
    if len(tree) == 1 :
       return(tree[0])
    else :
       return([convertTree(tree[1]),
               convertTree(tree[2])
              ]
             )
       
convertTree(simulatedTree)
    

[[['c', 'a', 'c', 'g', 'c'], ['t', 'c', 'c', 'g', 'g']],
 [['c', 'g', 'g', 'a', 'c'], ['c', 'g', 'g', 'c', 'c']]]

Two rules for defining a template tree recursively:
    (time) is a template tree
    (time,tree1,tree2) is a template tree if tree1 and tree2 is a template tree
Every template tree may be generated by application of the above two rules a finite number of times

In [10]:

def extract_genomes(tree):
    if len(tree)==1 :
        return(tree)
    else :
        return(extract_genomes(tree[1])+extract_genomes(tree[2]))

data=extract_genomes(simulatedTree)
data

[['c', 'a', 'c', 'g', 'c'],
 ['t', 'c', 'c', 'g', 'g'],
 ['c', 'g', 'g', 'a', 'c'],
 ['c', 'g', 'g', 'c', 'c']]

### Example finding the most parsimonious tree

In [11]:
cost=np.array([[0,1,1,1],[1,0,1,1],[1,1,0,1],[1,1,1,0]])
tree_list=enumerate_labelled_trees([1,2,3,4])
alphabet_lc=['a','c','g','t']
parsimonious_Sank(tree_list,data,alphabet_lc,cost)

([[1, 2], [3, 4]], 7.0)

### Exercise

Try to find the most parsimonious trees using these datasets

In [12]:
mouse='ACCAAAAAAACATCCAAACACCAACCCCAGCCCTTACGCAATAGCCATACAAAGAATATTATACTACTAAAAACTCAAATTAACTCTTTAATCTTTATACAACATTCCACCAACCTATCCACACAAAAAAACTCATATTTATCTAAATACGAACTTCACACAACCTTAACACATAAACATACCCCAGCCCAACACCCTTCCACAAATCCTTAATATACGCACCATAAATAAC'
m=[i for i in mouse]
bovine='ACCAAACCTGTCCCCACCATCTAACACCAACCCACATATACAAGCTAAACCAAAAATACCATACAACCATAAATAAGACTAATCTATTAAAATAACCCATTACGATACAAAATCCCTTTCGTCTAGATACAAACCACAACACACAATTAATACACACCACAATTACAATACTAAACTCCCATCCCACCAAATCACCCTCCATCAAATCCACAAATTACACAACCATTAACCC'
b=[i for i in bovine]
gibbon='ACTATACCCACCCAACTCGACCTACACCAATCCCCACATAGCACACAGACCAACAACCTCCCACCTTCCATACCAAGCCCCGACTTTACCGCCAACGCACCTCATCAAAACATACCTACAACACAAACAAATGCCCCCCCACCCTCCTTCTTCAAGCCCACTAGACCATCCTACCTTCCTAGCACGCCAAGCTCTCTACCATCAAACGCACAACTTACACATACAGAACCAC'
g=[i for i in gibbon]
orang='ACCCCACCCGTCTACACCAGCCAACACCAACCCCCACCTACTATACCAACCAATAACCTCTCAACCCCTAAACCAAACACTATCCCCAAAACCAACACACTCTACCAAAATACACCCCCAATTCACATCCGCACACCCCCACCCCCCCTGCCCACGTCCATCCCATCACCCTCTCCTCCCAACACCCTAAGCCACCTTCCTCAAAATCCAAAACCCACACAACCGAAACAAC'
o=[i for i in orang]
gorilla='ACCCCATTTATCCATAAAAACCAACACCAACCCCCATCTAACACACAAACTAATGACCCCCCACCCTCAAAGCCAAACACCAACCCTATAATCAATACGCCTTATCAAAACACACCCCCAACATAAACCCACGCACCCCCACCCCTTCCGCCCATGCTCACCACATCATCTCTCCCCTTCAACACCTCAATCCACCTCCCCCCAAATACACAATTCACACAAACAATACCAC'
go=[i for i in gorilla]
chimp='ACCCCATCCACCCATACAAACCAACATTACCCTCCATCCAATATACAAACTAACAACCTCCCACTCTTCAGACCGAACACCAATCTCACAACCAACACGCCCCGTCAAAACACCCCTTCAGCACAAATTCATACACCCCTACCTTTCCTACCCACGTTCACCACATCATCCCCCCCTCTCAACATCTTGACTCGCCTCTCTCCAAACACACAATTCACGCAAACAACGCCAC'
ch=[i for i in chimp]
human='ACCCCACTCACCCATACAAACCAACACCACTCTCCACCTAATATACAAATTAATAACCTCCCACCTTCAGAACTGAACGCCAATCTCATAACCAACACACCCCATCAAAGCACCCCTCCAACACAAACCCGCACACCTCCACCCCCCTCGTCTACGCTTACCACGTCATCCCTCCCTCTCAACACCTTAACTCACCTTCTCCCAAACGCACAATTCGCACACACAACGCCAC'
h=[i for i in human]

In [13]:
primates_and_friends=[m,b,g,o,go,ch,h]

Step 1- Generate all possible tree for these 7 species using enumerate_labelled_trees() function

1: mouse, 2: bovine, 3: gibbon, 4: orang, 5: gorilla, 6: chimp, 7: human

Step 2- Find the most parsimonius trees using parsimonious_Sank() function