<a href="https://colab.research.google.com/github/AbitathaRoy/SeqAlign-Mutation-Finder/blob/main/SeqAlign_Mutation_Finder.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Install BioPython

In [None]:
!pip install biopython

# Import Modules

In [None]:
from Bio import SeqIO
from Bio.Align import PairwiseAligner

# Load Sequences

In [None]:
# Load sequences from FASTA files
def read_fasta(filename):
    record = SeqIO.read(filename, "fasta")
    return record.seq

reference_seq = read_fasta("reference.fna")  # replace with your reference file
target_seq = read_fasta("target.fna")  # Replace with your target file


# Sequence Alignment

In [None]:
# Initialize aligner
aligner = PairwiseAligner()
aligner.mode = 'global'  # Use global alignment (Needleman-Wunsch)
aligner.match_score = 1
aligner.mismatch_score = -1
aligner.open_gap_score = -2
aligner.extend_gap_score = -1

# Perform alignment
alignments = aligner.align(reference_seq, target_seq)

# Print the best alignment
print(alignments[0])


# Mutations and Insertions/Deletions

In [None]:
# list mutations and insertions/deletions

# Extract aligned sequences
aligned_ref = alignments[0][0]
aligned_target = alignments[0][1]

# Dictionaries to store mutations and insertions/deletions
mutations = {}  # Format: {position: (reference_base, target_base)}
indels = {}     # Format: {position: (reference_base, target_base)}

# Process alignment results
for i in range(len(aligned_ref)):
    ref_base = aligned_ref[i]
    target_base = aligned_target[i]

    if ref_base != target_base:
        if ref_base == "-":  # Insertion in target
            indels[i + 1] = (ref_base, target_base)
        elif target_base == "-":  # Deletion in target
            indels[i + 1] = (ref_base, target_base)
        else:  # Substitution (Mutation)
            mutations[i + 1] = (ref_base, target_base)

# Print differences
def display_changes():
  print("Mutations:")
  for key in sorted(mutations):
    print(f"Position {key}: {mutations[key][0]} → {mutations[key][1]}")

  print("\nIndels:")
  for key in sorted(indels):
    print(f"Position {key}: {indels[key][0]} → {indels[key][1]}")

def displayChangedSequence(mutatedList, start, end):
  referenceMutation = ""
  targetMutation = ""
  for i in range(len(mutatedList)):
    referenceMutation += mutatedList[i][0]
    targetMutation += mutatedList[i][1]
    # print(i)

  output = f"Position {start} to {end}: {referenceMutation} → {targetMutation}"
  print(output)
  return output


def displayChanges():
  # initialise trackers
  flag = -1
  start = None
  end = None
  prevKey = None
  mutatedList = []

  with open("mutations_output.txt", "w") as file:
    file.write("Mutations:\n")
    print("Mutations:")

    for key in sorted(mutations):
      if flag == -1:
        flag = 0
        start = key
      if prevKey != None and key - prevKey == 1:
        mutatedList.append((mutations[key][0], mutations[key][1]))
      else:
        end = prevKey
        if bool(mutatedList) == True:
          output = displayChangedSequence(mutatedList, start, end)
          file.write(output + "\n")
        start = key
        mutatedList = [(mutations[key][0], mutations[key][1])]

      prevKey = key

    flag = -1

    file.write("\nIndels:\n")
    print("\nIndels:")
    for key in sorted(indels):
      if flag == -1:
        flag = 0
        start = key
      if prevKey != None and key - prevKey == 1:
        mutatedList.append((indels[key][0], indels[key][1]))
      else:
        end = prevKey
        if bool(mutatedList) == True:
          output = displayChangedSequence(mutatedList, start, end)
          file.write(output + "\n")
        start = key
        mutatedList = [(indels[key][0], indels[key][1])]

      prevKey = key

    if mutatedList:
      output = displayChangedSequence(mutatedList, start, prevKey)
      file.write(output + "\n")

displayChanges()