In [None]:
# Generated with help of Claude Sonnet 4.1 


from Bio import AlignIO
from collections import Counter
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment
from Bio.Seq import Seq


def analyze_site_patterns(phylip_file, output_file, output_format='fasta'):
    alignment = AlignIO.read(phylip_file, "phylip-relaxed")
    print(f"Original alignment: {len(alignment)} sequences, {alignment.get_alignment_length()} sites")
    # Find variable positions
    variable_positions = []
    invariant_count = 0

    for pos in range(alignment.get_alignment_length()):
        column = alignment[:, pos].upper()
        unique_chars = set(char.upper() for char in column 
                          if char.upper() in ['A', 'C', 'G', 'T'])
        
        if len(unique_chars) > 1:
            variable_positions.append(pos)
        else:
            invariant_count += 1

    print(f"Removing {invariant_count} invariant sites")
    print(f"Keeping {len(variable_positions)} variable sites")

    # Create new alignment with only variable sites
    new_records = []
    for record in alignment:
        # Extract only variable positions
        new_seq = ''.join(record.seq[pos] for pos in variable_positions)
        new_record = SeqRecord(Seq(new_seq), 
                              id=record.id, 
                              description=record.description)
        new_records.append(new_record)
    # Create new alignment
    new_alignment = MultipleSeqAlignment(new_records)
    if output_format.lower() == "fasta":
        AlignIO.write(new_alignment, output_file, "fasta")
    elif output_format.lower() == "phylip":
        AlignIO.write(new_alignment, output_file, "phylip-relaxed")
    else:
        raise ValueError("output_format must be 'fasta' or 'phylip'")
    
    print(f"Filtered alignment written to {output_file}")
    return new_alignment

# Usage
p = analyze_site_patterns(data_dir/"dme-ldpruned.min4.phy", 
                          data_dir/"dme-ldpruned.min4.filtered-ivariant.phy", 
                          output_format='phylip')