In [1]:
from array import array
from collections import deque
import numpy as np

In [2]:
# вспомогательная функция для преобразования двумерного индекса к одномурному.
def index2d_to_1d(i, j, m):
    return m * i + j

# Выравнивание с афинными штрафами

In [3]:
def alignment_weights(s, t, match_weight, mismatch_weight, gap_weight, open_gap_weight, gap='_'):
    n = len(s)
    m = len(t)
    
    vtable = array('h', [0]) * ((n + 1) * (m + 1))
    htable = array('h', [0]) * ((n + 1) * (m + 1))
    dtable = array('h', [0]) * ((n + 1) * (m + 1))
    
    for i in range(n + 1):
        vtable[index2d_to_1d(i, 0, m + 1)] += i * gap_weight + open_gap_weight
        
    for j in range(m + 1):
        htable[index2d_to_1d(0, j, m + 1)] += j * gap_weight + open_gap_weight

    i = 1
    while i <= n:
        j = 1
        while j <= m:
            vtable[index2d_to_1d(i, j, m + 1)] = max(
                vtable[index2d_to_1d(i - 1, j, m + 1)] + gap_weight,
                dtable[index2d_to_1d(i - 1, j, m + 1)] + gap_weight + open_gap_weight,
            )
                
            htable[index2d_to_1d(i, j, m + 1)] = max(
                htable[index2d_to_1d(i, j - 1, m + 1)] + gap_weight,
                dtable[index2d_to_1d(i, j - 1, m + 1)] + gap_weight + open_gap_weight,
            )
            
            dtable[index2d_to_1d(i, j, m + 1)] = max(
                dtable[index2d_to_1d(i - 1, j - 1, m + 1)] + (match_weight if s[i - 1] == t[j - 1] else mismatch_weight),
                vtable[index2d_to_1d(i, j, m + 1)],
                htable[index2d_to_1d(i, j, m + 1)],
            )

            j += 1
        i += 1
    return vtable, htable, dtable

def alignment(s, t, match_weight, mismatch_weight, gap_weight, open_gap_weight, gap='_'):
    n = len(s)
    m = len(t)
    
    vtable, htable, dtable = alignment_weights(s, t, 
                                               match_weight, 
                                               mismatch_weight, 
                                               gap_weight, 
                                               open_gap_weight, 
                                               gap)
    
    s_a, t_a = deque(), deque()
    
    i, j = n, m
    current_table = 'dtable'
    while i > 0 and j > 0:
        if current_table == 'vtable':
            s_a.appendleft(s[i - 1])
            t_a.appendleft(gap)
            i -= 1
            
            if vtable[index2d_to_1d(i, j, m + 1)] < dtable[index2d_to_1d(i, j, m + 1)] + open_gap_weight:
                current_table = 'dtable'
                
        elif current_table == 'htable':
            s_a.appendleft(gap)
            t_a.appendleft(t[j - 1])
            j -= 1
            
            if htable[index2d_to_1d(i, j, m + 1)] < dtable[index2d_to_1d(i, j, m + 1)] + open_gap_weight:
                current_table = 'dtable'
        else:
            prev_dtable = dtable[index2d_to_1d(i - 1, j - 1, m + 1)] + (match_weight if s[i - 1] == t[j - 1] else mismatch_weight)
            prev_htable = htable[index2d_to_1d(i, j, m + 1)]
            prev_vtable = vtable[index2d_to_1d(i, j, m + 1)]
            
            if prev_dtable >= max(prev_htable, prev_vtable):
                s_a.appendleft(s[i - 1])
                t_a.appendleft(t[j - 1])
                i -= 1
                j -= 1
            elif prev_vtable >= prev_htable:
                current_table = 'vtable'
            else:
                current_table = 'htable'
        
    while i > 0:
        s_a.appendleft(s[i - 1])
        t_a.appendleft(gap)
        i -= 1
            
    while j > 0:
        s_a.appendleft(gap)
        t_a.appendleft(t[j - 1])
        j -= 1
        
    
    return ''.join(s_a), ''.join(t_a)

## Пример

Создадим две последовательности с большим совпадающим участком в центре, но различающиеся по краям.

В первом случае штраф за открытие гэпа будет равен -1, а во втором -2.

In [4]:
import random
random.seed(0)

common_subseq = 'BIOINFORMATICS'
population = 'QWERTYUIOPASDFGHJKLZXCVBNM'

seq1 = ''.join([*random.choices(population, k=20), common_subseq, *random.choices(population, k=12)])
seq2 = ''.join([*random.choices(population, k=6), common_subseq, *random.choices(population, k=30)])

In [5]:
seq1

'CZAUFAXIDHBFIZJUBMCBBIOINFORMATICSOLBKDESHBMDV'

In [6]:
seq2

'UXGQLABIOINFORMATICSCKQDVUOVTGUMXSEOFNEGLGCGMHHSHA'

In [7]:
for line in alignment(seq1, seq2, 1, -1, -1, -1):
    print(line)

CZAUFAXIDHBFIZJU_BMCBBIOINFORMATICS______O___LBKDE_________SHBMDV
_______________UXGQLABIOINFORMATICSCKQDVUOVTGUMXSEOFNEGLGCGMHHSHA


In [8]:
for line in alignment(seq1, seq2, 1, -1, -1, -2):
    print(line)

CZAUFAXIDHBFIZJUBMCBBIOINFORMATICS__________________OLBKDESHBMDV
______________UXGQLABIOINFORMATICSCKQDVUOVTGUMXSEOFNEGLGCGMHHSHA
