# 1. Выравнивание с использованием алгоритма Needleman-Wunsch

In [2]:
def needleman_wunsch(seq1, seq2, mismatch, indel, match=1):
    n = len(seq1)
    m = len(seq2)

    dp = [[0] * (m + 1) for _ in range(n + 1)]

    for i in range(n + 1):
        dp[i][0] = indel * i
    for j in range(m + 1):
        dp[0][j] = indel * j

    for i in range(1, n + 1):
        for j in range(1, m + 1):
            score = match if seq1[i - 1] == seq2[j - 1] else mismatch
            dp[i][j] = max(dp[i - 1][j - 1] + score,
                           dp[i - 1][j] + indel,
                           dp[i][j - 1] + indel)

    return dp

In [3]:
s11 = 'GACGAAG'
s12 = 'ACCAAG'
s21 = 'CGTCTT'
s22 = 'CATTCT'
s31 = 'ATGTCAC'
s32 = 'ATCTCC'

In [6]:
mismatch1 = -1
mismatch2 = -1
mismatch3 = -2
indel1 = -1
indel2 = -2
indel3 = -2

In [7]:
res1 = needleman_wunsch(s11, s12, mismatch1, indel1)
res2 = needleman_wunsch(s21, s22, mismatch2, indel2)
res3 = needleman_wunsch(s31, s32, mismatch3, indel3)

In [8]:
res1

[[0, -1, -2, -3, -4, -5, -6],
 [-1, -1, -2, -3, -4, -5, -4],
 [-2, 0, -1, -2, -2, -3, -4],
 [-3, -1, 1, 0, -1, -2, -3],
 [-4, -2, 0, 0, -1, -2, -1],
 [-5, -3, -1, -1, 1, 0, -1],
 [-6, -4, -2, -2, 0, 2, 1],
 [-7, -5, -3, -3, -1, 1, 3]]

In [9]:
res2

[[0, -2, -4, -6, -8, -10, -12],
 [-2, 1, -1, -3, -5, -7, -9],
 [-4, -1, 0, -2, -4, -6, -8],
 [-6, -3, -2, 1, -1, -3, -5],
 [-8, -5, -4, -1, 0, 0, -2],
 [-10, -7, -6, -3, 0, -1, 1],
 [-12, -9, -8, -5, -2, -1, 0]]

In [10]:
res3

[[0, -2, -4, -6, -8, -10, -12],
 [-2, 1, -1, -3, -5, -7, -9],
 [-4, -1, 2, 0, -2, -4, -6],
 [-6, -3, 0, 0, -2, -4, -6],
 [-8, -5, -2, -2, 1, -1, -3],
 [-10, -7, -4, -1, -1, 2, 0],
 [-12, -9, -6, -3, -3, 0, 0],
 [-14, -11, -8, -5, -5, -2, 1]]

In [11]:
def traceback(dp, seq1, seq2, mismatch, indel, match=1):
    n = len(seq1)
    m = len(seq2)

    resseq1 = ""
    resseq2 = ""
    i = n
    j = m

    while i > 0 or j > 0:
        if i > 0 and j > 0 and dp[i][j] == dp[i - 1][j - 1] + (match if seq1[i - 1] == seq2[j - 1] else mismatch):
            resseq1 = seq1[i - 1] + resseq1
            resseq2 = seq2[j - 1] + resseq2
            i -= 1
            j -= 1
        elif i > 0 and dp[i][j] == dp[i - 1][j] + indel:
            resseq1 = seq1[i - 1] + resseq1
            resseq2 = "-" + resseq2
            i -= 1
        else:
            resseq1 = "-" + resseq1
            resseq2 = seq2[j - 1] + resseq2
            j -= 1

    score = dp[n][m]
    return resseq1, resseq2, score

In [12]:
print(traceback(res1, s11, s12, mismatch1, indel1))
print(traceback(res2, s21, s22, mismatch2, indel2))
print(traceback(res3, s31, s32, mismatch3, indel3))

('GACGAAG', '-ACCAAG', 3)
('CGTCTT', 'CATTCT', 0)
('ATGTCAC', 'ATCTC-C', 1)


# 2. Выравнивание с использованием Python

In [14]:
pip install biopython

Collecting biopython
  Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading biopython-1.85-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m31.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.85


In [15]:
from Bio import pairwise2
from Bio.pairwise2 import format_alignment

def align_and_print(seq1, seq2, pair_name, mismatch, indel, match=1):
    print(f"--- Alignment for Pair: {pair_name} ---")

    # Глобальное выравнивание с заданными параметрами
    global_alignments = pairwise2.align.globalms(seq1, seq2, match, mismatch, indel, indel)

    print("\nGlobal Alignment (Best):")
    if global_alignments:
        best_alignment = global_alignments[0] # Выбираем первое выравнивание (считаем его лучшим)
        print(format_alignment(*best_alignment))
        print(f"Score: {best_alignment[2]}") # выводим score
    else:
        print("No global alignment found.")


# Выполняем выравнивание для каждой пары последовательностей
align_and_print(s11, s12, "Sequence Pair 1", mismatch1, indel1)
align_and_print(s21, s22, "Sequence Pair 2", mismatch2, indel2)
align_and_print(s31, s32, "Sequence Pair 3", mismatch3, indel3)

--- Alignment for Pair: Sequence Pair 1 ---

Global Alignment (Best):
GACGAAG
 ||.|||
-ACCAAG
  Score=3

Score: 3.0
--- Alignment for Pair: Sequence Pair 2 ---

Global Alignment (Best):
CGTCTT
|.|..|
CATTCT
  Score=0

Score: 0.0
--- Alignment for Pair: Sequence Pair 3 ---

Global Alignment (Best):
ATGTCAC
||.|| |
ATCTC-C
  Score=1

Score: 1.0


