# ДЗ №9, Волжина Елена
[(WU)PGMA](https://compscicenter.ru/learning/assignments/26343/)
![Algorithm](hw09_algo.png)

In [1]:
from collections import defaultdict

from ete3 import Tree, TreeStyle

In [2]:
class PGMTree(object):
    def __init__(self, distances, is_weighted=True):
        # self.dist = defaultdict(dict)
        self.dist = defaultdict(dict)
        
        for node1 in distances:
            for node2 in distances[node1]:
                self.dist[node1][node2] = self.dist[node2][node1] = distances[node1][node2]
        
        self.nodes = set(node for node in self.dist)
        self.is_weighted = is_weighted
        self.cluster_size = {node: 1 for node in self.nodes}
        self.inner_cluster_dist = {node: 0 for node in self.nodes}
      
    @property
    def N(self):
        return len(self.nodes)

    def join_nodes(self, a, b):
        d_a_b = self.dist[a][b]
        d_u_a = d_a_b / 2 - self.inner_cluster_dist[a]
        d_u_b = d_a_b / 2 - self.inner_cluster_dist[b]
        u = '({}:{}, {}:{})'.format(a, d_u_a, b, d_u_b)
        
        self.nodes -= {a, b}
        w_a, w_b = self.cluster_size[a], self.cluster_size[b]
        for node in self.nodes:
            d = (w_a * self.dist[a][node] + w_b * self.dist[b][node]) / (w_a + w_b)            
            self.dist[node][u] = self.dist[u][node] = d
        
        self.nodes.add(u)
        self.cluster_size[u] = 1 if self.is_weighted else (w_a + w_b)
        self.inner_cluster_dist[u] = d_a_b / 2

    def process_PGM_step(self):
        assert self.N >= 2, 'Tree must have >= 2 nodes'
        
        # find (a, b) with minimum D(a, b)
        min_a, min_b, min_dist = None, None, None
        for a in self.nodes:
            for b in self.nodes - {a}:
                dist = self.dist[a][b]
                if min_dist is None or dist < min_dist:
                    min_a, min_b, min_dist = a, b, dist
        
        # join optimal neighbors
        self.join_nodes(min_a, min_b)

    def process_PGM(self):
        while self.N >= 2:
            self.process_PGM_step()
        
        a, = self.nodes        
        return a + ';'
        

![Weighted](hw09_example_weighted.png "Weighted")          ![Unweighted](hw09_example_unweighted.png "Unweighted")

In [3]:
distances = defaultdict(dict, {
        'K': {'L': 16, 'M': 16, 'N': 10},
        'L': {'M':  8, 'N': 8},
        'M': {'N': 4}
    })

tree_style = TreeStyle()
tree_style.show_leaf_name = True
tree_style.show_branch_length = True

In [4]:
pgm_tree = PGMTree(distances, is_weighted=True)
w_newick = pgm_tree.process_PGM()

print("Weighted", w_newick)
Tree(w_newick).show(tree_style=tree_style)

Weighted ((L:4.0, (N:2.0, M:2.0):2.0):3.25, K:7.25);


In [5]:
pgm_tree = PGMTree(distances, is_weighted=False)
uw_newick = pgm_tree.process_PGM()

print("Unweighted", uw_newick)
Tree(uw_newick).show(tree_style=tree_style)

Unweighted ((L:4.0, (N:2.0, M:2.0):2.0):3.0, K:7.0);
