In [96]:
import numpy as np
import random
from tqdm import tqdm

In [97]:
def generate_sequence(length):
    return ''.join(random.choice('ACGT') for _ in range(length))

In [98]:
def write_sequences_to_file(file_name, seq_length, seq1, seq2):
    with open(file_name, 'w') as file:
        file.write(f"{seq_length}\n")
        file.write(f"{seq1}\n{seq2}\n")

In [99]:
def needleman_wunsch(seq1, seq2, match=1, mismatch=-1, gap=-1):
    # Инициализация матрицы
    n = len(seq1)
    m = len(seq2)
    score_matrix = [[0 for j in range(m + 1)] for i in range(n + 1)]

    # Заполнение первой строки и столбца (штрафы за пропуски)
    for i in range(1, n + 1):
        score_matrix[i][0] = i * gap
    for j in range(1, m + 1):
        score_matrix[0][j] = j * gap

    # Заполнение матрицы оценок
    for i in tqdm(range(1, n + 1)):
        for j in range(1, m + 1):
            match_score = match if seq1[i - 1] == seq2[j - 1] else mismatch
            score_matrix[i][j] = max(
                score_matrix[i - 1][j - 1] + match_score,  # Совпадение/замена
                score_matrix[i - 1][j] + gap,             # Пропуск в seq2
                score_matrix[i][j - 1] + gap              # Пропуск в seq1
            )

    # Обратный проход для восстановления выравнивания
    aligned_seq1 = []
    aligned_seq2 = []
    i, j = n, m

    while i > 0 and j > 0:
        current_score = score_matrix[i][j]
        match_score = match if seq1[i - 1] == seq2[j - 1] else mismatch

        if current_score == score_matrix[i - 1][j - 1] + match_score:
            aligned_seq1.append(seq1[i - 1])
            aligned_seq2.append(seq2[j - 1])
            i -= 1
            j -= 1
        elif current_score == score_matrix[i - 1][j] + gap:
            aligned_seq1.append(seq1[i - 1])
            aligned_seq2.append('-')
            i -= 1
        else:
            aligned_seq1.append('-')
            aligned_seq2.append(seq2[j - 1])
            j -= 1

    # Добавляем оставшиеся символы
    while i > 0:
        aligned_seq1.append(seq1[i - 1])
        aligned_seq2.append('-')
        i -= 1
    while j > 0:
        aligned_seq1.append('-')
        aligned_seq2.append(seq2[j - 1])
        j -= 1

    # Разворачиваем выравненные последовательности
    aligned_seq1 = ''.join(reversed(aligned_seq1))
    aligned_seq2 = ''.join(reversed(aligned_seq2))

    return aligned_seq1, aligned_seq2, score_matrix[-1][-1]


In [100]:
seq_length = 5000

seq1 = generate_sequence(seq_length)
seq2 = generate_sequence(seq_length)

file_name = "sequences.txt"
write_sequences_to_file(file_name, seq_length, seq1, seq2)

In [101]:
result = needleman_wunsch(seq1, seq2)
# print("Выравнивание 1:", result[0])
# print("Выравнивание 2:", result[1])
print("Итоговая оценка:", result[2])


 43%|████▎     | 4304/10000 [00:13<00:18, 315.30it/s]


KeyboardInterrupt: 