# ДЗ № 14, Волжина Лена

Реализуйте алгоритм Forward-Backward для выравнивания двух последовательностей. [Задание](https://compscicenter.ru/learning/assignments/27582/)

In [1]:
from collections import defaultdict
from itertools import product


![FB_alignment](hw14_algo.png)

In [2]:
d = d1 = d2 = 1 / 3  # can start from any point
r1 = r2 = 0  # no different gaps in row
t = 0.1  # has it some meaning?
e = e1 = e2 = 1 / 2

In [3]:
transitions = {
    'M': {'X': d1, 'Y': d2, 'M': 1 - d1 - d2 - t, 'E': t},
    'X': {'X': e1, 'Y': r2, 'M': 1 - e1 - r2 - t, 'E': t},
    'Y': {'X': r1, 'Y': e2, 'M': 1 - e2 - r1 - t, 'E': t},
    'E': {'X': 0,  'Y': 0,  'M': 0, 'E': 0},
}
pi = {'X': d1, 'Y': d2, 'M': 1 - d1 - d2}

chars = 'ATGC'
alpha = 1/8      # probability of some mismatch
pam = {(c1, c2): 1 - alpha * (len(chars) - 1) if c1 == c2 else alpha   # match or mismatch
       for c1 in chars for c2 in chars}
gaps = {c: 1 / len(chars) for c in chars}

s1, s2 = 'AGA', 'AGAGA'

In [4]:
class FBAligner(object):
    def __init__(self, transition, match, gap):
        self.transition = transition
        self.match = match
        self.gap = gap
        self.states = ['M', 'X', 'Y']

    def calculate_alphas(self, s1, s2):
        alphas = defaultdict(dict)
        n, m = len(s1), len(s2)
        
        # init alphas
        alphas[(0, 0)]['M'] = 1
        alphas[(0, 0)]['X'] =  alphas[(0, 0)]['Y'] = 0
        for i in range(n): 
            alphas[(i, -1)] = {state: 0 for state in self.states}
        for j in range(m): 
            alphas[(-1, j)] = {state: 0 for state in self.states}
        
        # calculate alphas
        for i, j in product(range(n), range(m)):
            if i == j == 0:
                continue

            c1, c2 = s1[i], s2[j]
            pij = self.match[(c1, c2)]
            qi, qj = self.gap[c1], self.gap[c2]
            
            alphas[(i, j)] = {
                'M': pij * ((1 - 2 * d - t) * alphas[(i - 1, j - 1)]['M'] + 
                            (1 - e - t) * (alphas[(i - 1, j - 1)]['X'] + 
                                           alphas[(i - 1, j - 1)]['Y'])),
                'X': qi * (d * alphas[(i - 1, j)]['M'] + e * alphas[(i - 1, j)]['X']),
                'Y': qj * (d * alphas[(i, j - 1)]['M'] + e * alphas[(i, j - 1)]['Y'])
            }
        
        return alphas
       
    def calculate_betas(self, s1, s2):
        betas = defaultdict(dict)
        n, m = len(s1), len(s2)
        
        # init betas
        betas[(n - 1, m - 1)] = {state: t for state in self.states}
        for i in range(n): 
            betas[(i, m)] = {state: 0 for state in self.states}
        for j in range(m): 
            betas[(n, j)] = {state: 0 for state in self.states}
            
        # calculate betas
        for i, j in reversed(list(product(range(n), range(m)))):
            if i == n - 1 and j == m - 1:
                continue
            
            get_or_None = lambda xs, idx: xs[idx] if len(xs) > idx else None
            c1, c2 = get_or_None(s1, i), get_or_None(s2, j)
            pij = self.match.get((c1, c2), 100500)
            qi, qj = self.gap.get(c1, 3030), self.gap.get(c2, 4040)
            
            if pij * qi * qj > 1:
                print(i, j, pij, qi, qj)
            betas[(i, j)] = {
                'M': ((1 - 2 * d - t) * pij * betas[(i + 1, j + 1)]['M'] +
                      d * (qi * betas[(i + 1, j)]['X'] + qj * betas[(i, j + 1)]['Y'])),
                'X': ((1 - e - t) * pij * betas[(i + 1, j + 1)]['M'] +
                      e * qi * betas[(i + 1, j)]['X']),
                'Y': ((1 - e - t) * pij * betas[(i + 1, j + 1)]['M'] +
                      e * qj * betas[(i, j + 1)]['Y']),
            }
            
        return betas
        
    def process(self, s1, s2):
        n, m = len(s1), len(s2)
        alphas = self.calculate_alphas(s1, s2)
        betas = self.calculate_betas(s1, s2)
        
        # calculate alignment probabilities
        p_sum_fwd = sum(alphas[(n - 1, m - 1)].values()) * t
        p_sum_bwd = 0
        c1, c2 = s1[0], s2[0]
        pij = self.match[(c1, c2)]
        qi, qj = self.gap[c1], self.gap[c2]
        for state in self.states:
            p_open = pij if state == 'M' else (qi if state == 'X' else qj)
            p_sum_bwd += betas[(0, 0)][state] * (1 / 3) * p_open
        
        print(p_sum_fwd, p_sum_bwd)
        
        result = []
        for i in range(n):
            row = [round(alphas[(i, j)]['M'] * betas[(i, j)]['M'] / p_sum_fwd, 4)
                   for j in range(m)]
            result.append(row)
            print(row)
        
        return result

In [5]:
aligner = FBAligner(transitions, pam, gaps)
res = aligner.process(s1, s2)

0.0001139503761574074 4.8461160542052467e-05
[1.0, 0.0, 0.0, 0.0, 0.0]
[0.0, 0.5721, 0.0851, 0.3333, 0.0048]
[0.0, 0.0048, 0.1944, 0.0533, 0.7427]


In [6]:
for i in range(len(s1)):
    for j in range(len(s2))

SyntaxError: invalid syntax (<ipython-input-6-a001561d9898>, line 2)

In [None]:
print('      ' + '   '.join(s2))
for i in range(len(s1)):
    print(s1[i], end='  ')
    for j in range(len(s2)):
        print(alphas)
        