In [1]:
import numpy as np

# Выравнивание двух последовательностей с помощью HMM

In [2]:
def print_m(m, title):
    print(title)
    print("----------------------")
    for row in m:
        for c in row:
            print("{:.2f}".format(c), end=" ")
        print()

In [3]:
def alignment_HMM(seq1, seq2, delta1, delta2, p1, p2, eps1, eps2, tao, match, mismatch, gap):
    n, m = len(seq1), len(seq2)
    
    trans_prob = np.asarray([[1 - delta1 - delta2 - tao, delta1, delta2],
                             [1 - eps1 - p2 - tao, eps1, p2],
                             [1 - eps2 - p1 - tao, p1, eps2]])
    start_prob = np.asarray([1 - delta1 - delta2, delta1, delta2])
    
    event_prob = [[match, mismatch],
                  [gap]]
    M, I, D = FB_2dim(seq1, seq2, start_prob, trans_prob, match, mismatch, gap, tao)
    print_m(M, "Match")
    print()
    print_m(I, "Insertion")
    print()
    print_m(D, "Deletion")

In [4]:
def FB_2dim(s1, s2, start_prob, trans_prob, match, mismatch, gap, tao):
    n, m = len(s1) + 1, len(s2) + 1
    alpha = np.zeros((3, n, m))
    
    alpha[0, 1, 1] = start_prob[0] * (match if s1[0] == s2[0] else mismatch)
    alpha[1, 1, 0] = start_prob[1]
    alpha[2, 0, 1] = start_prob[2]
    for t1 in range(2, n):
        alpha[1, t1, 0] = gap * (alpha[1, t1 - 1, 0] * trans_prob[1, 1])
    for t2 in range(2, m):
        alpha[2, 0, t2] = gap * (alpha[2, 0, t2 - 1] * trans_prob[2, 2])
    
    for t1 in range(1, n):
        for t2 in range(1, m):
            match_prob = match if s1[t1 - 1] == s2[t2 - 1] else mismatch
            if not (t1 == t2 == 1):
                alpha[0, t1, t2] = match_prob * sum(alpha[:, t1 - 1, t2 - 1] * trans_prob[:, 0])
            alpha[1, t1, t2] = gap * sum(alpha[:, t1 - 1, t2] * trans_prob[:, 1])
            alpha[2, t1, t2] = gap * sum(alpha[:, t1, t2 - 1] * trans_prob[:, 2])
            
    final_prob = sum(alpha[:, n - 1, m - 1]) * tao
    
    beta = np.zeros((3, n, m))
    beta[:, n - 1, m - 1] = np.full((3, ), tao)
    
    for t1 in range(n - 2, -1, -1):
        beta[:, t1, m - 1] = beta[1, t1 + 1, m - 1] * trans_prob[:, 1] * gap
    for t2 in range(m - 2, -1, -1):
        beta[:, n - 1, t2] = beta[2, n - 1, t2 + 1] * trans_prob[:, 2] * gap
        
    for t1 in range(n - 2, -1, -1):
        for t2 in range(m - 2, -1, -1):
            match_prob = match if s1[t1] == s2[t2] else mismatch
            common_mult = np.asarray([beta[0, t1 + 1, t2 + 1] * match_prob,
                                      beta[1, t1 + 1, t2] * gap,
                                      beta[2, t1, t2 + 1] * gap])
            beta[0, t1, t2] = sum(common_mult * trans_prob[0, :])
            beta[1, t1, t2] = sum(common_mult * trans_prob[1, :])
            beta[2, t1, t2] = sum(common_mult * trans_prob[2, :])
            
    
    MID_prob = np.zeros((3, n, m))
    
    
    for i in range(n):
        for j in range(m):
            MID_prob[:, i, j] = alpha[:, i, j] * beta[:, i, j] / final_prob
            
    return MID_prob[0,:,:], MID_prob[1,:,:], MID_prob[2,:,:]

# Тесты 1-2

In [5]:
def test1():
    alignment_HMM("AGAGA", "AGAGAGA", 0.1, 0.1, 0.1, 0.1, 0.3, 0.3, 0.1, 0.9, 0.1, 1)

In [6]:
def test2():
    alignment_HMM("ATAGCTACGAC", "TGCTAGCTAGC", 0.1, 0.1, 0.1, 0.1, 0.3, 0.3, 0.1, 0.9, 0.1, 1)

In [7]:
test1()

Match
----------------------
0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 
0.00 0.83 0.00 0.12 0.00 0.00 0.00 0.00 
0.00 0.00 0.66 0.01 0.27 0.00 0.00 0.00 
0.00 0.00 0.00 0.50 0.01 0.43 0.00 0.00 
0.00 0.00 0.00 0.00 0.35 0.01 0.58 0.00 
0.00 0.00 0.00 0.00 0.00 0.19 0.01 0.74 

Insertion
----------------------
0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 
0.02 0.01 0.00 0.01 0.00 0.00 0.00 0.00 
0.00 0.02 0.01 0.01 0.01 0.00 0.00 0.00 
0.00 0.00 0.02 0.01 0.01 0.01 0.00 0.00 
0.00 0.00 0.00 0.01 0.01 0.01 0.02 0.01 
0.00 0.00 0.00 0.00 0.01 0.01 0.01 0.03 

Deletion
----------------------
0.00 0.15 0.14 0.01 0.00 0.00 0.00 0.00 
0.00 0.01 0.17 0.16 0.01 0.00 0.00 0.00 
0.00 0.00 0.02 0.17 0.17 0.02 0.00 0.00 
0.00 0.00 0.00 0.02 0.17 0.17 0.02 0.00 
0.00 0.00 0.00 0.00 0.01 0.17 0.17 0.02 
0.00 0.00 0.00 0.00 0.00 0.02 0.21 0.23 


In [8]:
test2()

Match
----------------------
0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 
0.00 0.29 0.05 0.16 0.01 0.01 0.00 0.00 0.00 0.00 0.00 0.00 
0.00 0.16 0.04 0.02 0.54 0.00 0.00 0.00 0.00 0.00 0.00 0.00 
0.00 0.04 0.02 0.01 0.01 0.67 0.00 0.00 0.00 0.00 0.00 0.00 
0.00 0.00 0.18 0.00 0.00 0.00 0.71 0.00 0.00 0.00 0.00 0.00 
0.00 0.00 0.00 0.21 0.00 0.00 0.00 0.71 0.00 0.00 0.00 0.00 
0.00 0.00 0.00 0.00 0.20 0.00 0.00 0.00 0.68 0.00 0.00 0.00 
0.00 0.00 0.00 0.00 0.00 0.17 0.01 0.00 0.01 0.56 0.01 0.00 
0.00 0.00 0.00 0.00 0.00 0.01 0.03 0.10 0.01 0.04 0.11 0.04 
0.00 0.00 0.00 0.00 0.00 0.00 0.04 0.01 0.07 0.03 0.32 0.02 
0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 0.01 0.14 0.14 0.05 
0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.02 0.73 

Insertion
----------------------
0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 
0.32 0.05 0.03 0.07 0.01 0.00 0.00 0.00 0.00 0.00 0.00 0.00 
0.08 0.08 0.02 0.01 0.03 0.01 0.00 0.00 0.00 0.00 0.00 0.00 
0.02 0.15 0.02 0.01 0.