In [12]:
from Bio import AlignIO
import os

# === CONFIGURATION ===
msa_file = "/home/hp/nayanika/github/GPX6/analysis/alignment/clustalo-I20250513-124120-0721-98866692-p1m.aln-clustal_num"
max_sequences = 3  # Show only first N sequences
output_html = msa_file + "_paper_singleline_large.html"

# --- Highlight Positions ---
positions_mutation_table = [
    3,4,16,22,24,25,27,29,30,31,33,35,40,47,48,49,52,54,60,67,69,71,74,
    87,99,102,104,107,119,120,126,137,139,142,143,144,148,173,177,178,
    181,182,184,188,192,194,195,196,197
]

positions_to_mark = [3,4,48,52,47,99,54,177,144,178,74,143,139,87,142,102,104,107,24,60,181,173]

# Colors
color_mutation = "#FFD700"  # Yellow
color_marked = "#FF0000"    # Red

# === FUNCTIONS ===
def validate_file(filepath):
    if not os.path.exists(filepath):
        raise FileNotFoundError(f"MSA file not found: {filepath}")
    return True

def get_position_color_map():
    """Map positions to highlight color with red overriding yellow."""
    position_colors = {}
    for pos in positions_mutation_table:
        position_colors[pos] = color_mutation
    for pos in positions_to_mark:
        position_colors[pos] = color_marked
    return position_colors

def format_aligned_sequence(aligned_seq, position_colors):
    """Format aligned sequence with large visible coloring."""
    result = ""
    for i, aa in enumerate(aligned_seq):
        position = i + 1  # 1-based indexing
        if position in position_colors:
            color = position_colors[position]
            if aa != "-":
                result += (
                    f'<span class="aa highlighted" '
                    f'style="background-color: {color}; color: white; font-weight: bold;">{aa}</span>'
                )
            else:
                result += (
                    f'<span class="gap highlighted" '
                    f'style="background-color: {color}; color: black; opacity: 0.8;">{aa}</span>'
                )
        else:
            if aa != "-":
                result += f"<span class='aa'>{aa}</span>"
            else:
                result += f"<span class='gap'>{aa}</span>"
    return result

def generate_html_content(alignment, position_colors):
    """Generate full HTML for single-line MSA visualization."""
    html_content = f"""<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>MSA Paper Figure</title>
    <style>
        html, body {{
            margin: 0;
            padding: 0;
            height: 100%;
            width: 100%;
            overflow: auto; /* allow scrolling instead of shrinking */
            background: white;
            font-family: 'Courier New', monospace;
        }}

        .msa-container {{
            display: flex;
            flex-direction: column;
            align-items: flex-start;
            justify-content: flex-start;
            padding: 20px;
        }}

        h2 {{
            font-size: 36px;
            margin-bottom: 30px;
            color: #2c3e50;
            text-align: center;
            width: 100%;
        }}

        /* Sequence rows */
        .sequence-row {{
            white-space: nowrap;
            display: flex;
            align-items: center;
            margin: 20px 0;
        }}

        /* Sequence name */
        .seq-name {{
            display: inline-block;
            width: 280px; /* wider space for labels */
            font-weight: bold;
            margin-right: 20px;
            font-size: 28px;
            color: #2c3e50;
            text-align: right;
        }}

        /* Sequence text */
        .sequence {{
            font-size: 40px; /* Larger font for publication */
            letter-spacing: 6px; /* Spacing between residues */
            line-height: 1.6;
            display: inline-block;
        }}

        /* Residues */
        .aa {{
            color: #2c3e50;
            font-weight: 600;
            padding: 2px 6px;
        }}

        .gap {{
            color: #95a5a6;
            opacity: 0.7;
            padding: 2px 6px;
        }}

        /* Highlighting */
        .highlighted {{
            border-radius: 8px;
            padding: 6px 10px;
            margin: 0 2px; /* spacing between highlights */
        }}
    </style>
</head>
<body>
    <div class="msa-container">
        <h2>Multiple Sequence Alignment</h2>
"""

    sequences_to_show = min(len(alignment), max_sequences)
    for i in range(sequences_to_show):
        record = alignment[i]
        seq_name = record.id if len(record.id) <= 30 else record.id[:27] + "..."
        formatted_seq = format_aligned_sequence(str(record.seq), position_colors)

        html_content += f"""
        <div class="sequence-row">
            <span class="seq-name">{seq_name}</span>
            <span class="sequence">{formatted_seq}</span>
        </div>
"""

    html_content += """
    </div>
</body>
</html>
"""
    return html_content

# === MAIN ===
def main():
    try:
        validate_file(msa_file)
        print(f"Reading alignment from: {msa_file}")

        alignment = AlignIO.read(msa_file, "clustal")
        position_colors = get_position_color_map()

        html_content = generate_html_content(alignment, position_colors)

        with open(output_html, "w", encoding="utf-8") as f:
            f.write(html_content)

        print(f"Large single-line MSA figure generated: {output_html}")
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()


Reading alignment from: /home/hp/nayanika/github/GPX6/analysis/alignment/clustalo-I20250513-124120-0721-98866692-p1m.aln-clustal_num
Large single-line MSA figure generated: /home/hp/nayanika/github/GPX6/analysis/alignment/clustalo-I20250513-124120-0721-98866692-p1m.aln-clustal_num_paper_singleline_large.html
