# Сравнение результатов для глобального и локального выравнивания двух последовательностей

In [133]:
import numpy as np
a = "AATCGAAGTCA"
b = "AACGCAGAAT"
match = 3
gap = -2
mismatch = -1

In [None]:
def score(x,y):
    if x == y:
        return match
    if x == "_" or y == "_":
        return gap
    return mismatch

Аналогично алгоритму Нидлмана-Вунша заполняем матрицу весов:

In [135]:
def score_matrix(seq1, seq2, score_fun):
    n, m = len(seq1), len(seq2)
    match = 3
    gap = -2
    mismatch = -1
    mat = np.zeros((m + 1, n + 1))
    
    for i in range(0, m + 1):
            mat[i][0] = 0   
    for j in range(0, n + 1):
            mat[0][j] = 0   
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            top = mat[i-1][j] + gap
            left = mat[i][j-1] + gap
            diag = mat[i-1][j-1] + score_fun(seq1[j-1],seq2[i-1])
            mat[i][j] = max(diag, left , top, 0)
    return(mat)

Возвращаемся назад от клетки с максимальным значением до нуля, собирая выравнивание

In [145]:
def traceback(mat, seq1, seq2):
    i, j = np.unravel_index(M.argmax(), M.shape)
    al1 = ""
    al2 = ""
    while i > 0 and j > 0 and M[i,j] != 0:
        score_curr = M[i,j]
        score_top = M[i,j-1]
        score_left = M[i-1,j]
        score_diag = M[i-1,j-1]
        if score_curr == score_diag + score(seq1[j-1], seq2[i-1]):
            al1 = seq1[j-1] + al1
            al2 = seq2[i-1] + al2
            i -= 1
            j -= 1
        elif score_curr == score_top + gap:
            al1 = seq1[j-1] + al1
            al2 = '_' + al2
            j -= 1
        elif score_curr == score_left + gap:
            al1 = '_' + al1
            al2 = seq2[i-1] + al2
            i -= 1
    while j > 0 and M[i,j] != 0:
        al1 = seq1[j-1] + al1
        al2 = '_' + al2
        j -= 1
    while i > 0 and M[i,j] !=0:
        al1 = '_' + al1
        al2 = seq2[i-1] + al2
        i -= 1
    print(al1, al2, sep="\n")

Для сравнения будем использовать алгоритм Нидлмана-Вунша из предыдущего домашнего задания:

In [143]:
def needleman_wunsch(a, b, score_fun = score):
    n, m = len(a), len(b)
    match = 3
    gap = -2
    mismatch = -1
    mat = np.zeros((m + 1, n + 1))
    
    for i in range(0, m + 1):
            mat[i][0] = gap * i
    for j in range(0, n + 1):
            mat[0][j] = gap * j
    for i in range(1, m + 1):
        for j in range(1, n + 1):
            top = mat[i-1][j] + gap
            left = mat[i][j-1] + gap
            diag = mat[i-1][j-1] + score_fun(a[j-1],b[i-1])
            mat[i][j] = max(diag, left , top)      
    seq1 = ""
    seq2 = ""

    i = m
    j = n
    while i > 0 and j > 0:
        score_curr = mat[i][j]
        score_top = mat[i][j-1]
        score_left = mat[i-1][j]
        score_diag = mat[i-1][j-1]
        if score_curr == score_diag + score_fun(a[j-1], b[i-1]):
            seq1 = a[j-1] + seq1
            seq2 = b[i-1] + seq2
            i -= 1
            j -= 1
        elif score_curr == score_top + gap:
            seq1 = a[j-1] + seq1
            seq2 = '_' + seq2
            j -= 1
        elif score_curr == score_left + gap:
            seq1 = '_' + seq1
            seq2 = b[i-1] + seq2
            i -= 1
    while j > 0:
        seq1 = a[j-1] + seq1
        seq2 = '_' + seq2
        j -= 1
    while i > 0:
        seq1 = '_' + seq1
        seq2 = b[i-1] + seq2
        i -= 1


    print(seq1, seq2, sep = "\n")

In [144]:
needleman_wunsch(a,b)
print("\n")
M = score_matrix(a, b, score)
traceback(M, a, b)

AATCGAAGTCA_
AA_CGCAG_AAT


AATCGAAG
AA_CGCAG
