In [4]:
try:
    from Bio import AlignIO
    from Bio.Align import AlignInfo
except ModuleNotFoundError:
    print("Biopython is not installed or cannot be imported.")
    print("Please install Biopython using: pip install biopython")
    exit()

def read_clustal_alignment(alignment_file):
    try:
        alignment = AlignIO.read(alignment_file, "clustal")
        return alignment
    except FileNotFoundError:
        print(f"File not found: {alignment_file}")
    except ValueError as ve:
        print(f"Error reading alignment: {ve}")
    except Exception as e:
        print(f"An error occurred: {e}")
    return None

def calculate_identity(alignment):
    summary = AlignInfo.SummaryInfo(alignment)
    consensus = summary.dumb_consensus()
    alignment_len = alignment.get_alignment_length()
    
    identities = []
    for record in alignment:
        seq = str(record.seq)
        identity = sum(1 for a, b in zip(seq, consensus) if a == b) / alignment_len * 100
        identities.append((record.id, len(seq), identity))
    
    return identities

def main():
    alignment_file = "/home/hp/nayanika/github/PhD_Thesis/EVB/protein_stepwise/GPX6MUT/alignment.aln"  # Replace with your Clustal format alignment file path

    alignment = read_clustal_alignment(alignment_file)

    if alignment:
        print(f"Alignment loaded from file: {alignment_file}")
        print(f"Alignment length: {alignment.get_alignment_length()}")

        identities = calculate_identity(alignment)
        for record_id, seq_len, identity in identities:
            print(f"Sequence {record_id}: Length = {seq_len}, Identity = {identity:.2f}%")

        # Print sequences and their IDs
        for record in alignment:
            print(f">{record.id}")
            print(record.seq)
            print()  # Print an empty line for separation
    else:
        print("Failed to load the alignment.")

if __name__ == "__main__":
    main()

Alignment loaded from file: /home/hp/nayanika/github/PhD_Thesis/EVB/protein_stepwise/GPX6MUT/alignment.aln
Alignment length: 197
Sequence original_humancys: Length = 197, Identity = 76.65%
Sequence original_mousecys: Length = 197, Identity = 75.63%
>original_humancys
PQNRKVDCNKGVTGTIYEYGALTLNGEEYIQFKQFAGKHVLFVNVAAYCGLAAQYPELNALQEELKNFGVIVLAFPCNQFGKQEPGTNSEILLGLKYVCPGSGFVPSFQLFEKGDVNGEKEQKVFTFLKNSCPPTSDLLGSSSQLFWEPMKVHDIRWNFEKFLVGPDGVPVMHWFHQAPVSTVKSDILEYLKQFNTH

>original_mousecys
PQKSKVDCNKGVTGTVYEYGANTIDGGEFVNFQQYAGKHILFVNVASFCGLTATYPELNTLQEELKPFNVTVLGFPCNQFGKQEPGKNSEILLGLKYVRPGGGYVPNFQLFEKGDVNGDNEQKVFSFLKNSCPPTSELFGSPEHLFWDPMKVHDIRWNFEKFLVGPDGVPVMRWFHHTPVRIVQSDIMEYLNQTS--



In [5]:
try:
    from Bio import AlignIO
except ModuleNotFoundError:
    print("Biopython is not installed or cannot be imported.")
    print("Please install Biopython using: pip install biopython")
    exit()

def read_clustal_alignment(alignment_file):
    try:
        alignment = AlignIO.read(alignment_file, "clustal")
        return alignment
    except FileNotFoundError:
        print(f"File not found: {alignment_file}")
    except ValueError as ve:
        print(f"Error reading alignment: {ve}")
    except Exception as e:
        print(f"An error occurred: {e}")
    return None

def find_different_positions(seq1, seq2):
    differences = []
    for i, (res1, res2) in enumerate(zip(seq1, seq2)):
        if res1 != res2:
            differences.append((i + 1, res1, res2))  # i + 1 to convert to 1-based position
    return differences

def main():
    alignment_file = "/home/hp/nayanika/github/PhD_Thesis/EVB/protein_stepwise/GPX6MUT/alignment.aln"  # Replace with your Clustal format alignment file path

    alignment = read_clustal_alignment(alignment_file)

    if alignment and len(alignment) == 2:
        print(f"Alignment loaded from file: {alignment_file}")
        
        seq1 = str(alignment[0].seq)
        seq2 = str(alignment[1].seq)

        # Find positions with different amino acids
        differences = find_different_positions(seq1, seq2)

        if differences:
            print("Differences:")
            for pos, res1, res2 in differences:
                print(f"Position {pos}: Sequence 1 - {res1}, Sequence 2 - {res2}")
        else:
            print("No differences found between the two sequences.")
    else:
        print("Alignment could not be loaded or does not contain exactly two sequences.")

if __name__ == "__main__":
    main()

Alignment loaded from file: /home/hp/nayanika/github/PhD_Thesis/EVB/protein_stepwise/GPX6MUT/alignment.aln
Differences:
Position 3: Sequence 1 - N, Sequence 2 - K
Position 4: Sequence 1 - R, Sequence 2 - S
Position 16: Sequence 1 - I, Sequence 2 - V
Position 22: Sequence 1 - L, Sequence 2 - N
Position 24: Sequence 1 - L, Sequence 2 - I
Position 25: Sequence 1 - N, Sequence 2 - D
Position 27: Sequence 1 - E, Sequence 2 - G
Position 29: Sequence 1 - Y, Sequence 2 - F
Position 30: Sequence 1 - I, Sequence 2 - V
Position 31: Sequence 1 - Q, Sequence 2 - N
Position 33: Sequence 1 - K, Sequence 2 - Q
Position 35: Sequence 1 - F, Sequence 2 - Y
Position 40: Sequence 1 - V, Sequence 2 - I
Position 47: Sequence 1 - A, Sequence 2 - S
Position 48: Sequence 1 - Y, Sequence 2 - F
Position 52: Sequence 1 - A, Sequence 2 - T
Position 54: Sequence 1 - Q, Sequence 2 - T
Position 60: Sequence 1 - A, Sequence 2 - T
Position 67: Sequence 1 - N, Sequence 2 - P
Position 69: Sequence 1 - G, Sequence 2 - N
Po