In [1]:
# Import necessary modules
from Bio import AlignIO
from collections import Counter
import sys

# Define the path to your alignment file
alignment_file = "H5_GisaidData.clustered.aligned.fasta"  # Replace with your file path
alignment_format = "fasta"               # Replace with the format of your alignment file (e.g., "clustal", "phylip")

# Read the alignment file
try:
    alignment = AlignIO.read(alignment_file, alignment_format)
except FileNotFoundError:
    print(f"Error: The file '{alignment_file}' was not found.")
    sys.exit(1)
except Exception as e:
    print(f"An error occurred while reading the alignment: {e}")
    sys.exit(1)

# Get the length of the alignment
alignment_length = alignment.get_alignment_length()

# Initialize a list to hold amino acid counts for each position
position_counts = [Counter() for _ in range(alignment_length)]

# Iterate over each sequence in the alignment
for record in alignment:
    sequence = str(record.seq)
    # Iterate over each position in the sequence
    for idx, amino_acid in enumerate(sequence):
        # Increment the count of the amino acid at the current position
        position_counts[idx][amino_acid] += 1

# Print amino acid frequencies for each position
print("Amino Acid Frequencies at Each Position:")
for idx, counter in enumerate(position_counts):
    total_counts = sum(counter.values())
    print(f"\nPosition {idx + 1} (Total Sequences: {total_counts}):")
    for amino_acid, count in counter.items():
        frequency = count / total_counts
        print(f"  {amino_acid}: {count} ({frequency:.2%})")

Amino Acid Frequencies at Each Position:

Position 1 (Total Sequences: 591):
  -: 590 (99.83%)
  X: 1 (0.17%)

Position 2 (Total Sequences: 591):
  -: 590 (99.83%)
  X: 1 (0.17%)

Position 3 (Total Sequences: 591):
  -: 590 (99.83%)
  X: 1 (0.17%)

Position 4 (Total Sequences: 591):
  -: 590 (99.83%)
  X: 1 (0.17%)

Position 5 (Total Sequences: 591):
  -: 590 (99.83%)
  X: 1 (0.17%)

Position 6 (Total Sequences: 591):
  -: 589 (99.66%)
  X: 1 (0.17%)
  L: 1 (0.17%)

Position 7 (Total Sequences: 591):
  -: 588 (99.49%)
  E: 1 (0.17%)
  X: 1 (0.17%)
  S: 1 (0.17%)

Position 8 (Total Sequences: 591):
  M: 461 (78.00%)
  -: 103 (17.43%)
  R: 1 (0.17%)
  W: 1 (0.17%)
  X: 24 (4.06%)
  K: 1 (0.17%)

Position 9 (Total Sequences: 591):
  E: 447 (75.63%)
  H: 1 (0.17%)
  -: 98 (16.58%)
  K: 14 (2.37%)
  R: 1 (0.17%)
  D: 2 (0.34%)
  P: 1 (0.17%)
  X: 25 (4.23%)
  W: 1 (0.17%)
  G: 1 (0.17%)

Position 10 (Total Sequences: 591):
  K: 358 (60.58%)
  N: 69 (11.68%)
  -: 95 (16.07%)
  E: 15 (2.54%)
