<a href="https://colab.research.google.com/github/AnnaPustelnyk/smith-waterman-algorithm/blob/main/Script.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import sys
from Bio import SeqIO
import numpy as np

def read_fasta_file(file_path):
    sequences = []
    for record in SeqIO.parse(file_path, "fasta"):
        sequences.append(str(record.seq))
    return sequences

def smith_waterman(seq1, seq2, match_score=1, mismatch_score=-1, gap=-2):
    m, n = len(seq1), len(seq2)

    score_matrix = np.zeros((m + 1, n + 1))

    max_score = 0
    max_i, max_j = 0, 0

    for i in range(1, m + 1):
        for j in range(1, n + 1):
            match = score_matrix[i - 1][j - 1] + (match_score if seq1[i - 1] == seq2[j - 1] else mismatch_score)
            delete = max(score_matrix[i - 1, j] + gap, 0)
            insert = max(score_matrix[i, j - 1] + gap, 0)

            score_matrix[i][j] = max(match, delete, insert, 0)

            if score_matrix[i][j] > max_score:
                max_score = score_matrix[i][j]
                max_i, max_j = i, j

    aligned_seq1, aligned_seq2 = [], []
    i, j = max_i, max_j

    while i > 0 and j > 0 and score_matrix[i][j] > 0:
        if score_matrix[i][j] == score_matrix[i - 1][j - 1] + (match_score if seq1[i - 1] == seq2[j - 1] else mismatch_score):
            aligned_seq1.insert(0, seq1[i - 1])
            aligned_seq2.insert(0, seq2[j - 1])
            i, j = i - 1, j - 1
        elif score_matrix[i][j] == score_matrix[i - 1, j] + gap:
            aligned_seq1.insert(0, seq1[i - 1])
            aligned_seq2.insert(0, '-')
            i -= 1
        else:
            aligned_seq1.insert(0, '-')
            aligned_seq2.insert(0, seq2[j - 1])
            j -= 1

    return "".join(aligned_seq1), "".join(aligned_seq2)

if len(sys.argv) < 2:
    print("Usage: python nw.py <fasta_file>")
    sys.exit(1)

if len(sys.argv) == 2:
    file_path = sys.argv[1]
    sequences = read_fasta_file(file_path)

    if len(sequences) != 2:
        print("Please provide exactly 2 sequences in the FASTA file.")
        sys.exit(1)

    seq1, seq2 = sequences
    print(seq1)
    print(seq2)
else:
    if len(sys.argv) != 3:
        print("Please provide exactly 2 sequence files.")
        sys.exit(1)

    file_path1 = sys.argv[1]
    file_path2 = sys.argv[2]

    seq1 = read_fasta_file(file_path1)[0]
    seq2 = read_fasta_file(file_path2)[0]
    print(seq1)
    print(seq2)

aligned_seq1, aligned_seq2 = smith_waterman(seq1, seq2)

with open("aligned_sequences.txt", "w") as output_file:
    output_file.write(f"Aligned Sequence 1: {aligned_seq1}\n")
    output_file.write(f"Aligned Sequence 2: {aligned_seq2}\n")

print("Alignment saved to 'aligned_sequences.txt'.")

FileNotFoundError: ignored