# ДЗ №8, Волжина Елена
[Neighbor joining](https://compscicenter.ru/learning/assignments/26347/)


Напишите программу, строящую по данной матрице расстояний между последовательностями филогентическое дерево с помощью алгоритма
![Algorithm](hw08_algo.png)

In [1]:
from collections import defaultdict
from functools import lru_cache

from ete3 import Tree, TreeStyle

In [2]:
class NJTree(object):
    def __init__(self, distances):
        # distances = defaultdict(dict)
        self.distances = distances
        
        for node1 in set(distances):
            for node2 in distances[node1]:
                self.distances[node2][node1] = distances[node1][node2]
        
        self.nodes = set(node for node in distances.keys())
    
    @property
    def N(self):
        return len(self.nodes)

    def calculate_M(self):
        result, N = {}, self.N
        for node in self.nodes:
            dist_sum = sum(self.distances[node][other_node] for other_node in self.nodes - {node})
            result[node] = dist_sum / (self.N - 2)
        return result
    
    def join_nodes(self, a, b, M):
        d_u_a = (self.distances[a][b] + M[a] - M[b]) / 2
        d_u_b = (self.distances[a][b] + M[b] - M[a]) / 2
        
        u = '({}:{}, {}:{})'.format(a, d_u_a, b, d_u_b)
        
        self.nodes -= {a, b}
        for node in self.nodes:
            d_u_x = (self.distances[b][node] + self.distances[a][node] - self.distances[a][b]) / 2
            self.distances[node][u] = self.distances[u][node] = d_u_x
        
        self.nodes.add(u)
    
    def process_NJ_step(self):
        assert self.N > 2, 'Tree must have >= 3 nodes'
        M = self.calculate_M()
        
        # find (a, b) with minimum D(a, b) - M(a) - M(b)
        min_a, min_b, min_value = None, None, None
        for a in self.nodes:
            for b in self.nodes - {a}:
                value = self.distances[a][b] - M[a] - M[b]
                if min_value is None or value < min_value:
                    min_a, min_b, min_value = a, b, value
        
        # join optimal neighbors
        self.join_nodes(min_a, min_b, M)
        
    def process_NJ(self):
        while self.N >= 3:
            self.process_NJ_step()
        
        a, b = self.nodes
        d_a_b = self.distances[a][b]
        return '({}:{}, {}:{});'.format(a, d_a_b / 2, b, d_a_b / 2)

![Example](hw08_example.png)

In [3]:
distances = defaultdict(dict, {
        'B': {'A': 5},
        'C': {'A': 4, 'B': 7},
        'D': {'A': 7, 'B': 10, 'C': 7},
        'E': {'A': 6, 'B': 9, 'C': 6, 'D': 5},
        'F': {'A': 8, 'B': 11, 'C': 8, 'D': 9, 'E': 8}
    })

nj_tree = NJTree(distances)
newick = nj_tree.process_NJ()
print(newick)

(((E:2.0, D:3.0):1.0, F:5.0):0.5, ((A:1.0, B:4.0):1.0, C:2.0):0.5);


In [4]:
tree = Tree(newick)

tree_style = TreeStyle()
tree_style.show_leaf_name = True
tree_style.show_branch_length = True


tree.img_style["size"] = 0   # hide 'root'
tree.show(tree_style=tree_style)