In [3]:
from Bio import AlignIO
import random

# === INPUTS ===
msa_file = "/home/hp/nayanika/github/GPX6/analysis/structure/clustalo-I20250513-124120-0721-98866692-p1m.aln-clustal_num"
positions_to_mark = [3, 4, 48, 52, 47, 99, 54, 177, 144, 178, 74, 143, 139, 87, 142, 102, 104, 107, 24, 60, 181, 173]
output_html = msa_file + "_colored.html"

# Define a set of colors that work well together and are visually distinct
color_palette = [
    "#E41A1C",  # red
    "#377EB8",  # blue
    "#4DAF4A",  # green
    "#984EA3",  # purple
    "#FF7F00",  # orange
    "#FFFF33",  # yellow
    "#A65628",  # brown
    "#F781BF",  # pink
    "#1B9E77",  # teal
    "#D95F02",  # vermillion
    "#7570B3",  # violet
    "#66A61E",  # lime
]

# Assign a fixed color to each position (makes it easier to track across sequences)
position_colors = {}
for i, pos in enumerate(positions_to_mark):
    position_colors[pos] = color_palette[i % len(color_palette)]

# Load the alignment
alignment = AlignIO.read(msa_file, "clustal")

# === HELPER FUNCTIONS ===
def mark_sequence_html(seq, positions, colors):
    """Create HTML with colored positions for a sequence"""
    result = ""
    for i, aa in enumerate(seq, 1):  # 1-based alignment index
        if i in positions:
            color = colors[i]
            result += f"<span style='background-color:{color}; color:white; font-weight:bold'>{aa}</span>"
        else:
            result += aa
    return result

# === WRITE HTML FILE ===
with open(output_html, "w") as f:
    # Write HTML header with enhanced styling
    f.write("""<!DOCTYPE html>
<html>
<head>
    <title>Colored Multiple Sequence Alignment</title>
    <style>
        body { font-family: Arial, sans-serif; background-color: #f5f5f5; margin: 20px; }
        .container { background-color: white; padding: 20px; border-radius: 5px; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
        pre { font-family: 'Courier New', monospace; font-size: 14px; line-height: 1.4; overflow-x: auto; }
        .seq-id { font-weight: bold; color: #333; margin-top: 10px; }
        .legend { margin-top: 20px; border-top: 1px solid #ddd; padding-top: 10px; }
        .legend-item { display: inline-block; margin-right: 15px; margin-bottom: 5px; }
        .legend-box { display: inline-block; width: 15px; height: 15px; margin-right: 5px; vertical-align: middle; }
    </style>
</head>
<body>
    <div class="container">
        <h2>Multiple Sequence Alignment with Highlighted Positions</h2>
        <div class="alignment">
""")
    
    # Write each sequence with colored positions
    for record in alignment:
        seq_id = record.id
        marked = mark_sequence_html(str(record.seq), positions_to_mark, position_colors)
        f.write(f'<div class="seq-id">{seq_id}</div>\n<pre>{marked}</pre>\n\n')
    
    # Add a legend for the colors
    f.write('<div class="legend">\n<h3>Position Legend:</h3>\n')
    for pos in sorted(positions_to_mark):
        color = position_colors[pos]
        f.write(f'<div class="legend-item"><span class="legend-box" style="background-color:{color};"></span>Position {pos}</div>\n')
    
    # Close the HTML
    f.write("</div>\n</div>\n</body>\n</html>")

print(f"✅ Enhanced colored alignment saved as HTML:\n{output_html}")
print(f"   - {len(positions_to_mark)} positions highlighted with unique colors")
print(f"   - Legend added for easier reference")

✅ Enhanced colored alignment saved as HTML:
/home/hp/nayanika/github/GPX6/analysis/structure/clustalo-I20250513-124120-0721-98866692-p1m.aln-clustal_num_colored.html
   - 22 positions highlighted with unique colors
   - Legend added for easier reference
