# ДЗ № 14, Волжина Лена

Реализуйте алгоритм Forward-Backward для выравнивания двух последовательностей. [Задание](https://compscicenter.ru/learning/assignments/27582/)

In [1]:
from collections import defaultdict
from itertools import product


![FB_alignment](hw14_algo.png)

In [2]:
d = 0.1    # start of gap
e = 0.3    # gap after gap
t = 0.2    # has it some meaning?

In [18]:
chars = 'ATGC'
match = {(c1, c2): 0.9 if c1 == c2 else 0.1
       for c1 in chars for c2 in chars}
gaps = {c: 1 for c in chars}

s1, s2 = 'AGAGA', 'AGAGAGAC'

In [16]:
class FBAligner(object):
    def __init__(self, match, gap):
        self.match = match
        self.gap = gap
        self.states = ['M', 'X', 'Y']

    def calculate_alphas(self, s1, s2):
        alphas = defaultdict(dict)
        n, m = len(s1), len(s2)
        
        # init alphas
        alphas[(0, 0)] = {'M': 1, 'X': 0, 'Y': 0}
        for i in range(n + 1):
            alphas[(i, -1)] = {state: 0 for state in self.states}
        for j in range(m + 1):
            alphas[(-1, j)] = {state: 0 for state in self.states}
        
        def process(i, j):
            alphas[(i, j)] = {
                'X': d * alphas[(i - 1, j)].get('M', 0) + e * alphas[(i - 1, j)]['X'],
                'Y': d * alphas[(i, j - 1)].get('M', 0) + e * alphas[(i, j - 1)]['Y'],
            }
            
            if i * j != 0:
                # can calculate (mis)match
                c1, c2 = s1[i - 1], s2[j - 1]
                pij = self.match[(c1, c2)]
                
                as_diag = alphas[(i - 1, j - 1)]
                sources = ((1 - 2 * d - t) * as_diag.get('M', 0) +
                           (1 - e - t) * (as_diag['X'] + as_diag['Y']))
                alphas[(i, j)]['M'] = pij * sources
        
        # calculate alphas
        for k in range(min(n, m) + 1):
            if k != 0:
                process(k, k)
            for n_row in range(k + 1, n + 1):
                process(n_row, k)
            for n_col in range(k + 1, m + 1):
                process(k, n_col)            
        
        return alphas

    def calculate_betas(self, s1, s2):
        betas = defaultdict(dict)
        n, m = len(s1), len(s2)
        
        # init betas
        betas[(n, m)] = {state: t for state in self.states}
        for i in range(n + 1): 
            betas[(i, m + 1)] = {state: 0 for state in self.states}
        for j in range(m + 1): 
            betas[(n + 1, j)] = {state: 0 for state in self.states}
            
        def process(i, j):
            if i == n or j == m:
                # no chars, no pij
                betas[(i, j)] = {
                    'M': d * (betas[(i + 1, j)]['X'] + betas[(i, j + 1)]['Y']),
                    'X': e * betas[(i + 1, j)]['X'],
                    'Y': e * betas[(i, j + 1)]['Y'],
                }
            else:
                c1, c2 = s1[i], s2[j]   # next chars
                pij = self.match[(c1, c2)]
                betas[(i, j)] = {
                    'M': ((1 - 2 * d - t) * pij * betas[(i + 1, j + 1)]['M'] +
                          d * (betas[(i + 1, j)]['X'] + betas[(i, j + 1)]['Y'])),
                    'X': ((1 - e - t) * pij * betas[(i + 1, j + 1)]['M'] +
                          e * betas[(i + 1, j)]['X']),
                    'Y': ((1 - e - t) * pij * betas[(i + 1, j + 1)]['M'] +
                          e * betas[(i, j + 1)]['Y']),
                }
                
        # calculate betas
        for k in range(min(n, m) + 1):
            #print('betas, k=', k)
            if k != 0:
                process(n - k, m - k)
            for n_row in reversed(range(n - k)):
                process(n_row, m - k)
            for n_col in reversed(range(m - k)):
                process(n - k, n_col)
        return betas
        
    def process(self, s1, s2):
        n, m = len(s1), len(s2)
        alphas = self.calculate_alphas(s1, s2)
        betas = self.calculate_betas(s1, s2)
        
        # calculate result
        p_sum_fwd = sum(alphas[(n, m)].values()) * t
        result = []
        for i in range(1, n + 1):
            row = [alphas[(i, j)]['M'] * betas[(i, j)]['M'] / p_sum_fwd
                   for j in range(1, m + 1)]
            result.append(row)
            
        return result

In [19]:
aligner = FBAligner(match, gaps)
alphas, betas = aligner.calculate_alphas(s1, s2), aligner.calculate_betas(s1, s2)
res = aligner.process(s1, s2)

print(' ' * 8 + (' ' * 9).join(s2))
for i in range(len(s1)):
    print(s1[i], end='    ')
    print('    '.join('{:.4f}'.format(v) for v in res[i]))

        A         G         A         G         A         G         A         C
A    0.8657    0.0050    0.1143    0.0008    0.0053    0.0000    0.0001    0.0000
G    0.0018    0.7379    0.0053    0.2266    0.0011    0.0102    0.0001    0.0001
A    0.0067    0.0017    0.6170    0.0054    0.3362    0.0017    0.0133    0.0008
G    0.0001    0.0095    0.0017    0.5005    0.0065    0.4445    0.0081    0.0109
A    0.0001    0.0001    0.0084    0.0023    0.3615    0.0143    0.5147    0.0869


In [6]:
# print alphas
for state in ['X', 'M', 'Y']:
    print('state', state)
    print(' ' * 8 + (' ' * 9).join(s2))
    for i in range(1, len(s1) + 1):
        print(s1[i - 1], end='    ')
        print('    '.join('{:.4f}'.format(alphas[(i, j)][state]) 
                          for j in range(1, len(s2) + 1)))
              

state X
        A         G         A         G         A
A    0.0000    0.0000    0.0000    0.0000    0.0000
G    0.0540    0.0005    0.0013    0.0000    0.0001
A    0.0167    0.0293    0.0007    0.0015    0.0001
state M
        A         G         A         G         A
A    0.5400    0.0050    0.0135    0.0004    0.0012
G    0.0050    0.2916    0.0030    0.0148    0.0003
A    0.0135    0.0030    0.1579    0.0017    0.0121
state Y
        A         G         A         G         A
A    0.0000    0.0540    0.0167    0.0064    0.0020
G    0.0000    0.0005    0.0293    0.0091    0.0042
A    0.0000    0.0013    0.0007    0.0160    0.0050


In [7]:
# print betas
for state in ['X', 'M', 'Y']:
    print('state', state)
    print(' ' * 8 + (' ' * 9).join(s2))
    for i in range(1, len(s1) + 1):
        print(s1[i - 1], end='    ')
        print('    '.join('{:.4f}'.format(betas[(i, j)][state]) 
                          for j in range(1, len(s2) + 1)))
              

state X
        A         G         A         G         A
A    0.0027    0.0013    0.0489    0.0280    0.0180
G    0.0001    0.0027    0.0010    0.0900    0.0600
A    0.0000    0.0000    0.0000    0.0000    0.2000
state M
        A         G         A         G         A
A    0.0048    0.0058    0.0585    0.0102    0.0060
G    0.0012    0.0060    0.0102    0.1080    0.0200
A    0.0005    0.0018    0.0060    0.0200    0.2000
state Y
        A         G         A         G         A
A    0.0073    0.0152    0.0489    0.0010    0.0000
G    0.0034    0.0111    0.0280    0.0900    0.0000
A    0.0016    0.0054    0.0180    0.0600    0.2000
