In [5]:
from Bio import SeqIO
from Bio.Align import PairwiseAligner

def read_sequence(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()[1:]  # Ignore the first line (sequence description)
        sequence = ''.join(line.strip() for line in lines)
    return sequence

def local_sequence_alignment(healthy_seq_file, mutated_seq_file):
    # Read healthy and mutated sequences from .fna files
    healthy_seq = read_sequence(healthy_seq_file)
    mutated_seq = read_sequence(mutated_seq_file)
    
    # Initialize Smith-Waterman aligner
    aligner = PairwiseAligner()
    aligner.mode = 'local'
    
    # Perform alignment
    alignments = aligner.align(healthy_seq, mutated_seq)
    
    # Check if mutated sequence aligns within the healthy sequence
    for alignment in alignments:
        print(alignment)
        print(f"Alignment score: {alignment.score}")
        
        # Access alignment positions using coordinates
        start, end = alignment.coordinates[0][0], alignment.coordinates[0][1]
        print(f"Alignment position in healthy sequence: Start - {start}, End - {end}")
        
        # Exit after the first meaningful alignment
        if alignment.score > 0:
            break

# Example usage
healthy_seq_file = 'gene_DNA_Assembly_GRCh38.fna'
mutated_seq_file = 'Mutation_train/Mutation_train1.fna'
local_sequence_alignment(healthy_seq_file, mutated_seq_file)

target            0 GGGTTTCTGCAGAGTCTACTTCAGAAGCGGAGGCACTGGGAGTCCGGTTTGGGATTGCCA
                  0 ||||------|-|----|-----|--|---|---|-----|------------|-----|
query             0 GGGT------A-A----A-----G--G---A---A-----A------------A-----A

target           60 GGCTGTGGTTGTGAGTCTGAGCTTGTGAGCGGCTGTGGCGCCCCAACTCTTCGCCAGCAT
                 60 -------------||-|-|--|-----||---------------|--||||-|---|---
query            14 -------------AG-C-G--C-----AG---------------A--TCTT-G---G---

target          120 ATCATCCCGGCAGGTAACCTCAGGCTCCAAGGGGCGGCCCCGGTCCCTGGCTGTGGAGGG
                120 --------|-----|-------|--|----||------------------------|||-
query            28 --------G-----T-------G--T----GG------------------------AGG-

target          180 GTGGCTCTAATTCCGCAGAAGGCAGGAATGGGGTAAAGGAAAAAAGCGCAGATCTTGGGT
                180 ----|---|-----|-------------|-----------------|--|-|----||--
query            37 ----C---A-----G-------------T-----------------C--A-A----GG--

target          240 GTGG