In [1]:
import numpy as np

# Выравнивание двух последовательностей с помощью HMM

In [2]:
def print_m(m, title):
    print(title)
    print("----------------------")
    for row in m:
        for c in row:
            print("{:.2f}".format(c), end=" ")
        print()

In [3]:
def alignment_HMM(seq1, seq2, delta1, delta2, p1, p2, eps1, eps2, tao, match, mismatch, gap):
    n, m = len(seq1), len(seq2)
    
    trans_prob = np.asarray([[1 - delta1 - delta2 - tao, delta1, delta2],
                             [1 - eps1 - p2 - tao, eps1, p2],
                             [1 - eps2 - p1 - tao, p1, eps2]])
    start_prob = np.asarray([1 - delta1 - delta2, delta1, delta2])
    
    event_prob = [[match, mismatch],
                  [gap]]
    M, I, D = FB_2dim(seq1, seq2, start_prob, trans_prob, match, mismatch, gap, tao)
    print_m(M, "Match")
    print()
    print_m(I, "Insertion")
    print()
    print_m(D, "Deletion")

In [4]:
def FB_2dim(s1, s2, start_prob, trans_prob, match, mismatch, gap, tao):
    n, m = len(s1) + 1, len(s2) + 1
    alpha = np.zeros((3, n, m))
    
    alpha[0, 0, 0] = 1
    for t1 in range(1, n):
        alpha[1, t1, 0] = gap * (alpha[1, t1 - 1, 0] * trans_prob[1, 1] + trans_prob[0, 1] * alpha[0, t1 - 1, 0])
    for t2 in range(1, m):
        alpha[2, 0, t2] = gap * (alpha[2, 0, t2 - 1] * trans_prob[2, 2] + trans_prob[0, 2] * alpha[0, 0, t2 - 1])

    for t1 in range(1, n):
        for t2 in range(1, m):
            match_prob = match if s1[t1 - 1] == s2[t2 - 1] else mismatch
            alpha[0, t1, t2] = match_prob * sum(alpha[:, t1 - 1, t2 - 1] * trans_prob[:, 0])
            alpha[1, t1, t2] = gap * sum(alpha[:, t1 - 1, t2] * trans_prob[:, 1])
            alpha[2, t1, t2] = gap * sum(alpha[:, t1, t2 - 1] * trans_prob[:, 2])
            
    final_prob = sum(alpha[:, n - 1, m - 1]) * tao
    
    beta = np.zeros((3, n, m))
    beta[:, n - 1, m - 1] = np.full((3, ), tao)
    
    for t1 in range(n - 2, -1, -1):
        beta[:, t1, m - 1] = beta[1, t1 + 1, m - 1] * trans_prob[:, 1] * gap
    for t2 in range(m - 2, -1, -1):
        beta[:, n - 1, t2] = beta[2, n - 1, t2 + 1] * trans_prob[:, 2] * gap
        
    for t1 in range(n - 2, -1, -1):
        for t2 in range(m - 2, -1, -1):
            match_prob = match if s1[t1] == s2[t2] else mismatch
            common_mult = np.asarray([beta[0, t1 + 1, t2 + 1] * match_prob,
                                      beta[1, t1 + 1, t2] * gap,
                                      beta[2, t1, t2 + 1] * gap])
            beta[0, t1, t2] = sum(common_mult * trans_prob[0, :])
            beta[1, t1, t2] = sum(common_mult * trans_prob[1, :])
            beta[2, t1, t2] = sum(common_mult * trans_prob[2, :])

    MID_prob = np.zeros((3, n, m))
    
    for i in range(n):
        for j in range(m):
            MID_prob[:, i, j] = alpha[:, i, j] * beta[:, i, j] / sum(alpha[:, i, j] * beta[:, i, j])
            
    return MID_prob[0,:,:], MID_prob[1,:,:], MID_prob[2,:,:]

# Тесты 1-2

In [5]:
def test1():
    alignment_HMM("AGAGA", "AGAGAGA", 0.1, 0.1, 0.1, 0.1, 0.3, 0.3, 0.1, 0.9, 0.1, 1)

In [6]:
def test2():
    alignment_HMM("ATAGCTACGAC", "TGCTAGCTAGC", 0.1, 0.1, 0.1, 0.1, 0.3, 0.3, 0.1, 0.9, 0.1, 1)

In [7]:
test1()

Match
----------------------
1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 
0.00 0.97 0.03 0.45 0.04 0.36 0.04 0.28 
0.00 0.06 0.96 0.03 0.62 0.05 0.50 0.05 
0.00 0.41 0.06 0.95 0.03 0.71 0.06 0.54 
0.00 0.05 0.56 0.06 0.94 0.03 0.76 0.06 
0.00 0.32 0.06 0.61 0.06 0.90 0.03 0.74 

Insertion
----------------------
0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 
1.00 0.01 0.02 0.02 0.07 0.03 0.07 0.06 
1.00 0.85 0.02 0.04 0.02 0.12 0.04 0.13 
1.00 0.55 0.78 0.02 0.05 0.02 0.17 0.07 
1.00 0.87 0.39 0.71 0.02 0.06 0.02 0.23 
1.00 0.62 0.79 0.31 0.64 0.02 0.06 0.03 

Deletion
----------------------
0.00 1.00 1.00 1.00 1.00 1.00 1.00 1.00 
0.00 0.02 0.95 0.53 0.89 0.60 0.89 0.66 
0.00 0.09 0.02 0.93 0.36 0.83 0.46 0.82 
0.00 0.04 0.16 0.03 0.92 0.27 0.78 0.38 
0.00 0.08 0.05 0.22 0.04 0.91 0.22 0.71 
0.00 0.06 0.15 0.08 0.30 0.07 0.91 0.23 


In [8]:
test2()

Match
----------------------
1.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 
0.00 0.65 0.14 0.28 0.16 0.60 0.07 0.13 0.09 0.45 0.07 0.07 
0.00 0.62 0.24 0.11 0.77 0.09 0.24 0.10 0.66 0.08 0.16 0.10 
0.00 0.18 0.39 0.22 0.13 0.91 0.09 0.12 0.08 0.78 0.08 0.09 
0.00 0.11 0.81 0.18 0.13 0.09 0.95 0.08 0.08 0.06 0.80 0.07 
0.00 0.10 0.12 0.93 0.09 0.07 0.06 0.96 0.07 0.07 0.06 0.79 
0.00 0.50 0.09 0.08 0.95 0.07 0.05 0.06 0.95 0.07 0.07 0.05 
0.00 0.06 0.28 0.09 0.08 0.92 0.09 0.06 0.06 0.92 0.08 0.07 
0.00 0.09 0.12 0.54 0.06 0.09 0.58 0.66 0.07 0.09 0.56 0.67 
0.00 0.07 0.54 0.07 0.21 0.07 0.68 0.17 0.36 0.11 0.61 0.15 
0.00 0.07 0.10 0.17 0.10 0.46 0.13 0.32 0.16 0.58 0.18 0.26 
0.00 0.07 0.09 0.50 0.10 0.07 0.21 0.63 0.13 0.08 0.25 0.73 

Insertion
----------------------
0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 
1.00 0.13 0.09 0.13 0.26 0.10 0.09 0.07 0.14 0.08 0.11 0.12 
1.00 0.31 0.14 0.10 0.05 0.38 0.21 0.15 0.05 0.25 0.18 0.20 
1.00 0.74 0.40 0.22 0.