In [None]:
import json
import pandas as pd
import numpy as np
from IPython.display import HTML, display
from collections import defaultdict
import re
import os
import sys
from typing import Dict, List, Any, Optional, Tuple
from core.metrics import calculate_repetition_metric, get_top_repetitive_words, get_multi_prompt_ngrams, calculate_slop_index_new

# --- Add core directory to Python path ---
SCRIPT_DIR = os.path.dirname(os.path.abspath('./')) # Assumes running from parent dir of script
CORE_DIR = os.path.join(SCRIPT_DIR, 'core')
if CORE_DIR not in sys.path:
    sys.path.insert(0, CORE_DIR)

MODELS_TO_IGNORE = [
        'mistralai/ministral-3b',
        'ministral-3b'
    ]

MODEL_NAME_SUBS = {
    'deepseek/deepseek-r1': 'deepseek-ai/DeepSeek-R1',
    'deepseek/deepseek-chat-v3-0324': 'deepseek-ai/DeepSeek-V3-0324',
    'anthropic/claude-3.5-sonnet': 'claude-3-5-sonnet-20241022',
    'openai/chatgpt-4o-latest': 'chatgpt-4o-latest-2025-01-29',
    'anthropic/claude-3.7-sonnet': 'claude-3-7-sonnet-20250219',
    'openai/gpt-4.5-preview': 'gpt-4.5-preview',
    'cohere/command-a': 'CohereForAI/c4ai-command-a-03-2025',
    'anthropic/claude-3.5-haiku': 'claude-3-5-haiku-20241022',
    'google/gemini-2.0-flash-001': 'gemini-2.0-flash-001',
    'openai/gpt-4o-mini': 'gpt-4o-mini',
    'mistralai/mistral-nemo': 'mistralai/Mistral-Nemo-Instruct-2407',
    'mistralai/mistral-small-3.1-24b-instruct': 'mistralai/Mistral-Small-3.1-24B-Instruct-2503',
    'mistralai/mistral-small-24b-instruct-2501': 'mistralai/Mistral-Small-24B-Instruct-2501',
    'mistralai/ministral-3b': 'ministral-3b',
    'chatgpt-4o-latest': 'chatgpt-4o-latest-2025-03-27',
    'rekaai/reka-flash-3:free': 'RekaAI/reka-flash-3',
}

# --- Helper function to update model name if a substitution exists ---
def get_updated_model_name(original: str) -> str:
    return MODEL_NAME_SUBS.get(original, original)

# --- Import metrics functions ---
try:
    from core.metrics import calculate_slop_index, calculate_complexity_index
except ImportError as e:
    print(f"Error importing metrics from core.metrics: {e}", file=sys.stderr)
    print("Please ensure core/metrics.py exists and is in the Python path.", file=sys.stderr)
    # Define dummy functions if import fails to avoid crashing later
    def calculate_slop_index(text: str) -> float: return -1.0
    def calculate_complexity_index(text: str) -> float: return -1.0

# Config variables
RUNS_FILE = "creative_bench_runs.json"
ELO_RESULTS_FILE = "elo_results.json"
ELO_RESULTS_UPDATED_FILE = "elo_results_with_metrics.json"

PROMPTS_ORDER = [
    "25", "9", "8", "33", "31", "4", "3", "32", "20", "30",
    "15", "19", "18", "7", "28", "6", "5", "16", "1", "2",
    "10", "11", "12", "13", "14", "17", "21", "22", "23",
    "24", "26", "29"
]

# --- Existing Functions (load_json_file, sanitize_model_name, etc.) ---
def load_json_file(file_path: str) -> Dict:
    """Load data from a JSON file."""
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return {}
    with open(file_path, 'r', encoding='utf-8') as f:
        try:
            return json.load(f)
        except json.JSONDecodeError:
            print(f"Error decoding JSON from {file_path}")
            return {}

def sanitize_model_name(model_name: str) -> str:
    """Sanitize model name for use in filenames."""
    sanitized = model_name.replace("/", "__")
    unsafe_chars = r'<>:"|?*\\'
    for char in unsafe_chars:
        sanitized = sanitized.replace(char, '-')
    return sanitized

# --- Updated generate_model_report Function ---
def generate_model_report(model_name: str, run_key: Optional[str] = None, save_to_file: bool = False) -> HTML:
    """
    Generate an HTML report for a specific model with theme and font selection,
    including a back button and dark mode toggle.

    Args:
        model_name: The name of the model to generate the report for
        run_key: Optional specific run key to use
        save_to_file: Whether to save the report to an HTML file

    Returns:
        An HTML object containing the report
    """
    # --- Data Loading and Processing (Identical to previous version) ---
    runs_data = load_json_file(RUNS_FILE)
    elo_data = load_json_file(ELO_RESULTS_FILE)

    if run_key is None:
        matching_runs = [k for k, v in runs_data.items() if v.get("test_model") == model_name]
        if not matching_runs:
            return HTML(f"<h2>No runs found for model: {model_name}</h2>")
        run_key = matching_runs[-1]

    if run_key not in runs_data:
        return HTML(f"<h2>Run key not found: {run_key}</h2>")

    run_data = runs_data[run_key]
    original_model_name = run_data.get("test_model", model_name)
    display_model_name = get_updated_model_name(original_model_name) # Use updated name for display

    creative_tasks = run_data.get("creative_tasks", {})
    if not creative_tasks:
        return HTML(f"<h2>No creative tasks found for run: {run_key}</h2>")

    # --- Data Processing (Identical) ---
    creative_prompts = {}
    try:
        # Adjust path relative to SCRIPT_DIR if needed
        creative_prompts_file = run_data.get("creative_prompts_file", os.path.join(SCRIPT_DIR, "data/creative_writing_prompts_v3.json"))
        if os.path.exists(creative_prompts_file):
            creative_prompts = load_json_file(creative_prompts_file)
        else:
             print(f"Warning: Creative prompts file not found at {creative_prompts_file}")
    except Exception as e:
        print(f"Warning: Could not load creative prompts: {str(e)}")

    neg_criteria = []
    try:
        neg_criteria_file = 'data/negative_criteria.txt'
        if os.path.exists(neg_criteria_file):
            with open(neg_criteria_file, 'r') as f:
                neg_criteria = [line.strip().lower() for line in list(f.readlines())]
        else:
            print(f"Warning: {neg_criteria_file} not found. Negative criteria scoring adjustment will not be applied.")
    except FileNotFoundError:
        print(f"Warning: {neg_criteria_file} not found. Negative criteria scoring adjustment will not be applied.")

    iterations = {}
    for iter_idx, prompt_data in creative_tasks.items():
        iter_score_sum = 0
        iter_score_count = 0
        for prompt_id, task_data in prompt_data.items():
            if task_data.get("status") not in ["completed", "judged"]: continue
            results_by_mod = task_data.get("results_by_modifier", {})
            for seed_mod, block in results_by_mod.items():
                j_scores = block.get("judge_scores", {})
                for metric, val in j_scores.items():
                    if isinstance(val, (int, float)):
                        iter_score_sum += (20 - val) if metric.lower() in neg_criteria else val
                        iter_score_count += 1
        iterations[iter_idx] = {
            "score": round(iter_score_sum / iter_score_count, 2) if iter_score_count > 0 else 0,
            "prompts": prompt_data
        }
    sorted_iterations = sorted(iterations.items(), key=lambda x: x[1]["score"], reverse=True)
    # --- End Data Processing ---


    # --- HTML Generation with Themes, Fonts, Back Button, Dark Mode ---
    html_output = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>Model Outputs: {display_model_name}</title>
        <meta name="viewport" content="width=device-width, initial-scale=1">
        <style>
            /* ----------------------------------------------------
            1) Font Imports & Face Definitions
            ---------------------------------------------------- */
            /* Lora (Used for Cozy Headers) */
            @import url('https://fonts.googleapis.com/css2?family=Lora:ital,wght@0,400..700;1,400..700&display=swap');
            /* Merriweather (Used for Modern Headers - fallback) */
            @import url('https://fonts.googleapis.com/css2?family=Merriweather:ital,wght@0,300;0,400;0,700;1,300;1,400;1,700&display=swap');

            /* Dynamic font loading will be handled with JavaScript */
            /* We'll keep the font declarations in CSS for fallback purposes in case JS fails */

            /* ----------------------------------------------------
            2) Base Variables & Font Defaults
            ---------------------------------------------------- */
            :root {{
                /* Default Theme: Cozy Light */
                --theme-name: 'cozy'; /* JS uses this */

                /* Fonts */
                --font-body-cozy: 'Tiempos Text', Georgia, serif;
                --font-heading-cozy: 'Lora', serif;
                --font-body-modern: 'Inter', sans-serif; /* Changed modern default body */
                --font-heading-modern: 'Besley', 'Merriweather', serif;
                --font-ui: 'Lora', sans-serif; /* For controls */

                /* Default to Cozy fonts */
                --font-body: var(--font-body-cozy);
                --font-heading: var(--font-heading-cozy);

                /* Cozy Light Colors */
                --bg-color: #fdfaf6;
                --text-color: #3a3a3a;
                --header-color: #5c4033;
                --subheader-color: #7a6a60;
                --border-color: #e0dcd1;
                --accent-border-color: #d3c0a5;
                --container-bg: #fffcf7;
                --iter-header-bg: #f5f0e8;
                --iter-header-hover-bg: #ede8de;
                --prompt-header-bg: #faf5ef;
                --prompt-header-hover-bg: #f5f0e8;
                --judge-bg: #f3f6f9;
                --judge-border: #c8d7e6;
                --judge-text: #555;
                --prompt-display-bg: #f9f6f0;
                --toggle-icon-color: #8a7a70;
                --shadow-color: rgba(0, 0, 0, 0.08);
                --link-color: #7a6a60;
                --link-hover-color: #5c4033;
                --toggle-bg: #ccc; /* Not used visually now */
                --toggle-checked-bg: #7a6a60; /* Not used visually now */
                --toggle-knob-bg: white; /* Not used visually now */
                --select-text-color: var(--subheader-color);
                --select-chevron-color: var(--subheader-color);
                --select-bg: transparent;
                --select-border: none;
            }}

            /* ----------------------------------------------------
            3) Cozy Dark Mode Variables
            ---------------------------------------------------- */
            body.theme-cozy.dark-mode {{
                --bg-color: #2a2527;
                --text-color: #fff9f2;
                --header-color: #f7eee0;
                --subheader-color: #e9dfd0;
                --border-color: #3e3936;
                --accent-border-color: #6a5349;
                --container-bg: #312c2e;
                --iter-header-bg: #342e2f;
                --iter-header-hover-bg: #413935;
                --prompt-header-bg: #312b2d;
                --prompt-header-hover-bg: #3a3234;
                --judge-bg: #2f3136;
                --judge-border: #4e4944;
                --judge-text: #fcf5eb;
                --prompt-display-bg: #302a2c;
                --toggle-icon-color: #c0b0a0;
                --shadow-color: #0c0705;
                --link-color: #d0bca8;
                --link-hover-color: #ebdac5;
                --toggle-bg: #524740; /* Not used visually now */
                --toggle-checked-bg: #9a8778; /* Not used visually now */
                --toggle-knob-bg: #ede6dc; /* Not used visually now */
                --select-text-color: var(--subheader-color);
                --select-chevron-color: var(--subheader-color);
            }}

            /* ----------------------------------------------------
            4) Modern Theme Variables (Light & Dark)
            ---------------------------------------------------- */
            body.theme-modern {{
                --theme-name: 'modern'; /* JS uses this */

                /* Fonts */
                --font-body: var(--font-body-modern);
                --font-heading: var(--font-heading-modern);

                /* Modern Light Colors */
                --bg-color: #ffffff;
                --text-color: #212529;
                --header-color: #000000;
                --subheader-color: #495057;
                --border-color: #dee2e6;
                --accent-border-color: #adb5bd;
                --container-bg: #ffffff;
                --iter-header-bg: #f8f9fa;
                --iter-header-hover-bg: #e9ecef;
                --prompt-header-bg: #ffffff;
                --prompt-header-hover-bg: #f8f9fa;
                --judge-bg: #f1f3f5;
                --judge-border: #ced4da;
                --judge-text: #343a40;
                --prompt-display-bg: #f8f9fa;
                --toggle-icon-color: #6c757d;
                --shadow-color: rgba(0, 0, 0, 0.1);
                --link-color: #007bff;
                --link-hover-color: #0056b3;
                --toggle-bg: #ced4da; /* Not used visually now */
                --toggle-checked-bg: #007bff; /* Not used visually now */
                --toggle-knob-bg: white; /* Not used visually now */
                --select-text-color: var(--subheader-color);
                --select-chevron-color: var(--subheader-color);
            }}

            body.theme-modern.dark-mode {{
                /* Modern Dark Colors */
                --bg-color: #1a1a1a;
                --text-color: #e9ecef;
                --header-color: #ffffff;
                --subheader-color: #adb5bd;
                --border-color: #495057;
                --accent-border-color: #6c757d;
                --container-bg: #212529;
                --iter-header-bg: #343a40;
                --iter-header-hover-bg: #495057;
                --prompt-header-bg: #2c3034;
                --prompt-header-hover-bg: #343a40;
                --judge-bg: #343a40;
                --judge-border: #495057;
                --judge-text: #ced4da;
                --prompt-display-bg: #343a40;
                --toggle-icon-color: #adb5bd;
                --shadow-color: rgba(0, 0, 0, 0.3);
                --link-color: #69b1ff;
                --link-hover-color: #a8d1ff;
                --toggle-bg: #495057; /* Not used visually now */
                --toggle-checked-bg: #0d6efd; /* Not used visually now */
                --toggle-knob-bg: #dee2e6; /* Not used visually now */
                --select-text-color: var(--subheader-color);
                --select-chevron-color: var(--subheader-color);
            }}


            /* ----------------------------------------------------
            5) Base Global Styles (Theme Independent)
            ---------------------------------------------------- */
            body {{
                font-family: var(--font-body);
                line-height: 1.7;
                color: var(--text-color);
                background-color: var(--bg-color);
                max-width: 900px;
                margin: 30px auto;
                padding: 40px 50px;
                border: 1px solid var(--border-color);
                box-shadow: 0 5px 15px var(--shadow-color);
                transition: background-color 0.3s, color 0.3s, border-color 0.3s;
            }}
            h1, h2, h3, h4 {{
                font-family: var(--font-heading);
                color: var(--header-color);
                margin-top: 2em;
                margin-bottom: 0.8em;
                line-height: 1.3;
                transition: color 0.3s;
            }}
            h1 {{
                text-align: center;
                font-size: 2.5em;
                border-bottom: 2px solid var(--accent-border-color);
                padding-bottom: 15px;
                margin-bottom: 1.5em;
                font-weight: 700;
                transition: border-color 0.3s;
                font-family: var(--font-ui) !important; /* Keep title in UI font */
            }}
            h2 {{
                font-size: 1.8em;
                font-weight: 700;
            }}
            h3 {{
                font-size: 1.4em;
                font-style: italic;
                font-weight: 400;
                color: var(--subheader-color);
            }}
            strong {{
                font-weight: bold;
                color: var(--header-color);
                transition: color 0.3s;
            }}
            a {{
                color: var(--link-color);
                text-decoration: none;
                transition: color 0.3s;
            }}
            a:hover {{
                color: var(--link-hover-color);
                text-decoration: underline;
            }}
            .top-controls {{
                display: flex;
                justify-content: space-between; /* Align items to opposite ends */
                align-items: center;
                margin-bottom: 20px;
                padding-bottom: 10px;
                border-bottom: 1px solid var(--border-color);
                transition: border-color 0.3s;
                font-family: var(--font-ui) !important; /* Keep controls in UI font */
            }}
            .back-button {{
                font-family: var(--font-ui) !important;
                font-size: 1em;
                color: var(--select-text-color); /* Add this line to match other nav elements */
                transition: color 0.3s; /* Add transition for smooth theme changes */
            }}
            
            /* Controls right side container */
            .controls-right {{
                display: flex;
                align-items: center;
                gap: 15px; /* Space between controls */
            }}

            /* ----------------------------------------------------
            6) Theme Specific Overrides & Effects
            ---------------------------------------------------- */

            /* Cozy Theme Specifics */
            body.theme-cozy {{
                /* Existing body styles are cozy defaults */
            }}
            body.theme-cozy.dark-mode {{
                box-shadow: 0 5px 20px var(--shadow-color);
                background-image: linear-gradient(to bottom, #211f21, #232022);
            }}
            body.theme-cozy.dark-mode .iteration-container {{
                box-shadow: 0 2px 8px #000000;
                border-color: var(--border-color);
            }}
            body.theme-cozy.dark-mode h1 {{
                text-shadow: 0 1px 2px #000000;
            }}
            body.theme-cozy.dark-mode .content-block {{
                border-color: var(--border-color);
            }}
            body.theme-cozy.dark-mode .prompt-text-display {{
                border-left: 3px solid var(--accent-border-color);
                background-color: #362e2b;
            }}
            body.theme-cozy.dark-mode .scores-container {{
                color: #b0a598;
            }}

            /* Modern Theme Specifics */
            body.theme-modern {{
                padding: 35px 45px;
            }}
            body.theme-modern h1 {{
                font-weight: 600;
                border-bottom-width: 1px;
            }}
            body.theme-modern h2 {{
                font-weight: 600;
            }}
            body.theme-modern h3 {{
                font-weight: 500; /* Use Medium for Inter/Modern */
                font-style: normal;
            }}
            body.theme-modern .iteration-header {{
                font-weight: 600; /* Besley */
            }}
            body.theme-modern .prompt-header {{
                font-weight: 500; /* Besley */
                font-style: normal;
            }}
            body.theme-modern .prompt-text-display {{
                border-left-width: 4px;
                border-radius: 3px;
                font-style: normal; /* Modern prompt less italic */
            }}
            body.theme-modern .judge-content {{
                border-style: solid;
                border-width: 1px;
            }}
            body.theme-modern strong {{
                font-weight: 600; /* Use SemiBold for Inter/Modern */
            }}


            /* ----------------------------------------------------
            7) Components / Containers (Theme Independent Styles)
            ---------------------------------------------------- */

            /* --- Selectors (Theme, Font) --- */
            .control-select-wrapper {{
                position: relative;
                display: inline-block;
            }}
            .control-select {{
                font-family: var(--font-ui) !important;
                font-size: 0.9em;
                color: var(--select-text-color);
                background-color: var(--select-bg);
                border: none;
                padding: 2px 5px 2px 18px; /* top/bottom, right, left (space for chevron) */
                margin: 0;
                cursor: pointer;
                appearance: none;
                -webkit-appearance: none;
                -moz-appearance: none;
                transition: color 0.3s;
                border-radius: 0; /* Ensure no default rounding */
            }}
            .control-select:focus {{
                outline: none;
            }}
            /* Custom Chevron */
            .control-select-wrapper::before {{ /* Changed from ::after */
                content: '▼';
                font-size: 0.6em;
                color: var(--select-chevron-color);
                position: absolute;
                left: 5px; /* Position on the left */
                top: 50%;
                transform: translateY(-50%);
                pointer-events: none;
                transition: color 0.3s;
            }}
            .control-select option {{
                background-color: var(--bg-color);
                color: var(--text-color);
                font-family: var(--font-ui); /* Ensure options use UI font */
            }}

            /* --- Dark Mode Toggle --- */
            .mode-toggle {{
                display: flex;
                align-items: center;
                font-family: var(--font-ui) !important;
            }}
            .mode-toggle .form-check-input {{ /* The hidden checkbox */
                opacity: 0;
                width: 0;
                height: 0;
                position: absolute;
            }}
            /* No visual switch span needed */
            .mode-toggle .form-check-label {{ /* The clickable text */
                font-family: var(--font-ui) !important;
                font-size: 0.9em;
                color: var(--subheader-color);
                cursor: pointer;
                transition: color 0.3s;
                user-select: none; /* Prevent text selection on click */
                padding: 2px 5px; /* Add some padding for easier clicking */
            }}
            .mode-toggle .form-check-label:hover {{
                color: var(--link-hover-color); /* Use link hover color for feedback */
            }}


            /* --- Report Content Containers --- */
            .iteration-container {{
                margin: 30px 0;
                border: 1px solid var(--border-color);
                border-radius: 4px;
                overflow: hidden;
                background-color: var(--container-bg);
                box-shadow: 0 2px 5px rgba(0,0,0,0.05);
                transition: background-color 0.3s, border-color 0.3s, box-shadow 0.3s;
            }}
            .iteration-header {{
                background: var(--iter-header-bg);
                padding: 12px 20px;
                cursor: pointer;
                position: relative;
                border-bottom: 1px solid var(--border-color);                
                font-size: 1.2em;
                font-weight: 700;
                color: var(--header-color);
                transition: background-color 0.3s, border-color 0.3s, color 0.3s;
            }}
            .iteration-header:hover {{
                background: var(--iter-header-hover-bg);
            }}
            .prompt-container {{
                border-top: 1px dashed var(--accent-border-color);
                transition: border-color 0.3s;
            }}
            .prompt-container:first-child {{
                border-top: none;
            }}
            .prompt-header {{
                background: var(--prompt-header-bg);
                padding: 10px 20px;
                cursor: pointer;
                font-size: 1.1em;
                font-weight: 400;
                color: var(--subheader-color);
                transition: background-color 0.3s, color 0.3s;
            }}
            .prompt-header:hover {{
                background: var(--prompt-header-hover-bg);
            }}
            .content-block {{
                padding: 15px 25px;
                border-top: 1px solid var(--border-color);
                background-color: var(--container-bg);
                transition: background-color 0.3s, border-color 0.3s;
            }}
            .response-content {{
                white-space: pre-wrap;
                font-family: var(--font-body);
                font-size: 1.05em;
                line-height: 1.7;
                margin-bottom: 15px;
                color: var(--text-color);
                transition: color 0.3s;
            }}
            .judge-content {{
                white-space: pre-wrap;
                font-family: var(--font-body);
                font-size: 1.0em;
                line-height: 1.6;
                background: var(--judge-bg);
                border: 1px dashed var(--judge-border);
                padding: 10px 15px;
                margin-top: 10px;
                border-radius: 3px;
                color: var(--judge-text);
                transition: background-color 0.3s, border-color 0.3s, color 0.3s;
            }}
            .prompt-text-display {{
                font-style: italic; /* Default italic */
                color: var(--subheader-color);
                margin-bottom: 1em;
                padding: 10px 15px;
                background-color: var(--prompt-display-bg);
                border-left: 3px solid var(--accent-border-color);
                white-space: pre-wrap;
                font-family: var(--font-body);
                transition: background-color 0.3s, border-color 0.3s, color 0.3s, font-style 0.3s;
            }}
            .collapsible-content {{
                display: none;
                padding: 0;
                background-color: var(--container-bg);
                transition: background-color 0.3s;
            }}
            .expanded {{
                display: block;
            }}
            .toggle-icon {{
                display: inline-block;
                width: 20px;
                text-align: center;
                font-weight: bold;
                margin-right: 8px;
                color: var(--toggle-icon-color);
                transition: color 0.3s;
            }}
            .scores-container {{
                margin-left: 20px;
                font-style: italic;
                color: #888;
                font-size: 0.9em;
            }}

            /* Make certain elements always use the UI font */
            h1, 
            .back-button,
            .control-select,
            .form-check-label,
            .top-controls {{
                font-family: var(--font-ui) !important; /* Override with UI font */
            }}

            h1.main-title, /* Add a class to the main title */
            .back-button,
            .control-select,
            .form-check-label,
            .top-controls {{
                font-family: var(--font-ui) !important; /* Override with UI font */
            }}

            /* Allow iteration and prompt headers to use selected font */
            .iteration-header,
            .prompt-header {{
                font-family: var(--font-body) !important;
            }}

            /* Mobile Responsiveness Adjustments */
            @media screen and (max-width: 768px) {{
    /* Body / Layout */
    body.theme-cozy,
    body.theme-modern {{
        max-width: 100%;
        margin: 10px 5px;
        padding: 15px 10px;
    }}

    /* Headings */
    body.theme-cozy h1,
    body.theme-modern h1 {{
        font-size: 1.8em;
        padding-bottom: 10px;
        margin-bottom: 1em;
    }}

    body.theme-cozy h2,
    body.theme-modern h2 {{
        font-size: 1.5em;
    }}

    body.theme-cozy h3,
    body.theme-modern h3 {{
        font-size: 1.2em;
    }}

    /* Iteration / Prompt headers */
    body.theme-cozy .iteration-header,
    body.theme-modern .iteration-header {{
        padding: 10px 12px;
    }}

    body.theme-cozy .prompt-header,
    body.theme-modern .prompt-header {{
        padding: 8px 12px;
    }}

    /* Content blocks */
    body.theme-cozy .content-block,
    body.theme-modern .content-block {{
        padding: 10px 12px;
    }}

    /* Top controls layout */
    body.theme-cozy .top-controls,
    body.theme-modern .top-controls {{
        flex-direction: column;
        align-items: flex-start;
        gap: 10px;
    }}

    body.theme-cozy .controls-right,
    body.theme-modern .controls-right {{
        width: 100%;
        justify-content: space-between;
    }}
}}



        </style>
    </head>
    <body class="theme-cozy">
        <div class="top-controls">
            <div class="nav-left">
                <a href="javascript:history.back()" class="back-button">← Back</a>
            </div>
            
            <div class="controls-right">
                <div class="control-select-wrapper">
                    <select id="themeSelector" class="control-select" aria-label="Select Theme">
                        <option value="cozy">Cozy</option>
                        <option value="modern">Modern</option>
                    </select>
                </div>

                <div class="control-select-wrapper">
                    <select id="fontSelector" class="control-select" aria-label="Select Font">
                        <option value="tiempos">Tiempos Text</option>
                        <option value="bookerly">Bookerly</option>
                        <option value="bitter">Bitter Pro</option>
                        <option value="roboto">Roboto</option>
                        <option value="inter">Inter</option>
                        <option value="source_sans">Source Sans 3</option>
                        <option value="open_sans">Open Sans</option>
                        <option value="fira_sans">Fira Sans</option>
                        <option value="besley">Besley</option>
                    </select>
                </div>

                <div class="mode-toggle">
                    <input class="form-check-input" type="checkbox" id="darkModeToggle">
                    <label class="form-check-label" for="darkModeToggle" id="toggleLabel">Light</label>
                </div>
            </div>
        </div>

        <h1 class="main-title">Sample Outputs: {display_model_name}</h1>
    """

    # --- Iteration and Prompt Loop (Identical HTML structure, CSS handles styling) ---
    for display_idx, (iter_idx, iter_data) in enumerate(sorted_iterations):
        is_first = display_idx == 0
        html_output += f"""
        <div class="iteration-container">
            <div class="iteration-header" onclick="toggleContent('iteration-{iter_idx}')">
                <span class="toggle-icon">{'−' if is_first else '+'}</span>
                Iteration {display_idx + 1} — Avg Score: {round(iter_data['score']*5, 1)}
            </div>
            <div id="iteration-{iter_idx}" class="collapsible-content {'expanded' if is_first else ''}">
        """
        prompt_data = iter_data["prompts"]
        prompt_items = []
        for prompt_id, task_data in prompt_data.items():
            if task_data.get("status") not in ["completed", "judged"]: continue
            prompt_text = task_data.get("base_prompt", "")
            if not prompt_text: continue

            prompt_category = "Unknown Category"
            prompt_title = f"Prompt {prompt_id}"
            if prompt_id in creative_prompts:
                prompt_info = creative_prompts[prompt_id]
                prompt_category = prompt_info.get("category", prompt_category)
                prompt_title = prompt_info.get("title", prompt_title)

            all_responses = []
            total_score = 0
            score_count = 0
            results_by_mod = task_data.get("results_by_modifier", {})
            for seed_mod, block in results_by_mod.items():
                response_text = block.get("model_response", "")
                raw_judge_text = block.get("raw_judge_text", "")
                j_scores = block.get("judge_scores", {})
                response_scores_list = []
                for metric, val in j_scores.items():
                    if isinstance(val, (int, float)):
                        score_val = (20 - val) if metric.lower() in neg_criteria else val
                        total_score += score_val
                        score_count += 1
                        response_scores_list.append(f"{metric}: {val}")
                all_responses.append({
                    "text": response_text, "judge_text": raw_judge_text, "scores": ", ".join(response_scores_list)
                })
            avg_score = round(total_score / score_count, 2) if score_count > 0 else 0
            prompt_items.append({
                "id": prompt_id, "prompt": prompt_text, "category": prompt_category,
                "title": prompt_title, "responses": all_responses, "avg_score": avg_score
            })

        def get_prompt_order(prompt_item):
            try: return PROMPTS_ORDER.index(prompt_item["id"])
            except ValueError: return len(PROMPTS_ORDER)
        prompt_items.sort(key=get_prompt_order)

        for pidx, item in enumerate(prompt_items):
            prompt_html_id = f"prompt-{iter_idx}-{item['id']}"
            html_output += f"""
            <div class="prompt-container">
                <div class="prompt-header" onclick="toggleContent('{prompt_html_id}')">
                    <span class="toggle-icon">+</span>
                    {item['category'].capitalize()}: {item['title']} — Score: {round(item['avg_score']*5, 1)}
                </div>
                <div id="{prompt_html_id}" class="collapsible-content">
                    <div class="content-block">
                        <div class="prompt-text-display">
<strong>Prompt:</strong><br>{item['prompt']}
                        </div>"""
            for ridx, response in enumerate(item["responses"]):
                html_output += f"""
                        <div class="response-content">
<strong>Model Output:</strong><br>{response['text']}
                        </div>"""
                if response["judge_text"]:
                    scores_display = f"<br><i>Scores: {response['scores']}</i>" if response['scores'] else ""
                    html_output += f"""
                        <div class="judge-content">
<strong>Judge Evaluation:</strong><br>{response['judge_text']} {scores_display}
                        </div>"""
                if ridx < len(item["responses"]) - 1:
                    html_output += "<hr style='border: none; border-top: 1px dotted var(--border-color); margin: 15px 0; transition: border-color 0.3s;'>"
            html_output += """
                    </div>
                </div>
            </div>"""
        html_output += """
            </div>
        </div>"""
    # --- End Iteration Loop ---

    # --- JavaScript for Toggling, Dark Mode, Themes, Fonts with Dynamic Font Loading ---
    html_output += """
        <script>
            // --- DOM Elements ---
            const body = document.body;
            const themeSelector = document.getElementById('themeSelector');
            const fontSelector = document.getElementById('fontSelector');
            const darkModeToggle = document.getElementById('darkModeToggle');
            const toggleLabel = document.getElementById('toggleLabel');

            // --- Constants ---
            const FONT_MAP = {
                'tiempos': "'Tiempos Text', Georgia, serif",
                'bookerly': "'Bookerly', Georgia, serif",
                'bitter': "'Bitter Pro', Georgia, serif",
                'roboto': "'Roboto', sans-serif",
                'inter': "'Inter', sans-serif",
                'source_sans': "'Source Sans 3', sans-serif",
                'open_sans': "'Open Sans', sans-serif",
                'fira_sans': "'Fira Sans', sans-serif",
                'besley': "'Besley', 'Merriweather', serif" // Primarily a heading font
            };
            
            // Font definitions with URLs for dynamic loading
            const FONT_DEFINITIONS = {
                'tiempos': {
                    family: 'Tiempos Text',
                    variants: [
                        { weight: 400, style: 'normal', url: 'fonts/tiempos_text/TiemposText-Regular.woff2' },
                        { weight: 400, style: 'italic', url: 'fonts/tiempos_text/TiemposText-RegularItalic.woff2' },
                        { weight: 700, style: 'normal', url: 'fonts/tiempos_text/TiemposText-Bold.woff2' }
                    ],
                    fallback: 'Georgia, serif'
                },
                'bookerly': {
                    family: 'Bookerly',
                    variants: [
                        { weight: 400, style: 'normal', url: 'fonts/bookerly/Bookerly.woff' },
                        { weight: 400, style: 'italic', url: 'fonts/bookerly/Bookerly Italic.woff' },
                        { weight: 700, style: 'normal', url: 'fonts/bookerly/Bookerly Bold.woff' }
                    ],
                    fallback: 'Georgia, serif'
                },
                'bitter': {
                    family: 'Bitter Pro',
                    variants: [
                        { weight: 400, style: 'normal', url: 'fonts/bitter_pro/BitterPro-Regular.ttf' },
                        { weight: 400, style: 'italic', url: 'fonts/bitter_pro/BitterPro-Italic.ttf' },
                        { weight: 700, style: 'normal', url: 'fonts/bitter_pro/BitterPro-Bold.ttf' }
                    ],
                    fallback: 'Georgia, serif'
                },
                'roboto': {
                    family: 'Roboto',
                    variants: [
                        { weight: 400, style: 'normal', url: 'fonts/roboto/static/Roboto-Regular.ttf' },
                        { weight: 400, style: 'italic', url: 'fonts/roboto/static/Roboto-Italic.ttf' },
                        { weight: 700, style: 'normal', url: 'fonts/roboto/static/Roboto-Bold.ttf' }
                    ],
                    fallback: 'sans-serif'
                },
                'inter': {
                    family: 'Inter',
                    variants: [
                        { weight: 400, style: 'normal', url: 'fonts/inter/static/Inter-Regular.ttf' },
                        { weight: 400, style: 'italic', url: 'fonts/inter/static/Inter-Italic.ttf' },
                        { weight: 700, style: 'normal', url: 'fonts/inter/static/Inter-Bold.ttf' }
                    ],
                    fallback: 'sans-serif'
                },
                'source_sans': {
                    family: 'Source Sans 3',
                    variants: [
                        { weight: 400, style: 'normal', url: 'fonts/source_sans_3/static/SourceSans3-Regular.ttf' },
                        { weight: 400, style: 'italic', url: 'fonts/source_sans_3/static/SourceSans3-Italic.ttf' },
                        { weight: 700, style: 'normal', url: 'fonts/source_sans_3/static/SourceSans3-Bold.ttf' }
                    ],
                    fallback: 'sans-serif'
                },
                'open_sans': {
                    family: 'Open Sans',
                    variants: [
                        { weight: 400, style: 'normal', url: 'fonts/open_sans/static/OpenSans-Regular.ttf' },
                        { weight: 400, style: 'italic', url: 'fonts/open_sans/static/OpenSans-Italic.ttf' },
                        { weight: 700, style: 'normal', url: 'fonts/open_sans/static/OpenSans-Bold.ttf' }
                    ],
                    fallback: 'sans-serif'
                },
                'fira_sans': {
                    family: 'Fira Sans',
                    variants: [
                        { weight: 400, style: 'normal', url: 'fonts/fira_sans/FiraSans-Regular.ttf' },
                        { weight: 400, style: 'italic', url: 'fonts/fira_sans/FiraSans-Italic.ttf' },
                        { weight: 700, style: 'normal', url: 'fonts/fira_sans/FiraSans-Bold.ttf' }
                    ],
                    fallback: 'sans-serif'
                },
                'besley': {
                    family: 'Besley',
                    variants: [
                        { weight: 400, style: 'normal', url: 'fonts/besley/Besley-VariableFont_wght.ttf' },
                        { weight: 400, style: 'italic', url: 'fonts/besley/Besley-Italic-VariableFont_wght.ttf' }
                    ],
                    fallback: 'serif'
                }
            };
            
            // Define which fonts are generally sans-serif for logic purposes
            const SANS_FONTS = ['roboto', 'inter', 'source_sans', 'open_sans', 'fira_sans'];

            const THEME_DEFAULT_FONTS = {
                'cozy': 'tiempos',
                'modern': 'inter' // Default body font for modern
            };
            const THEME_DEFAULT_HEAD_FONTS = {
                 'cozy': "'Lora', serif",
                 'modern': "'Besley', 'Merriweather', serif"
            };
            
            // Keep track of loaded fonts to avoid loading the same font multiple times
            const loadedFonts = new Set();

            // --- Dynamic Font Loading ---
            async function loadFontFace(fontKey) {
                if (loadedFonts.has(fontKey)) return; // Skip if already loaded
                
                const fontDef = FONT_DEFINITIONS[fontKey];
                if (!fontDef) {
                    console.warn(`Font definition not found for: ${fontKey}`);
                    return;
                }
                
                try {
                    const fontLoadPromises = fontDef.variants.map(variant => {
                        const fontFace = new FontFace(
                            fontDef.family,
                            `url(${variant.url})`,
                            {
                                weight: variant.weight,
                                style: variant.style
                            }
                        );
                        
                        return fontFace.load().then(loadedFont => {
                            document.fonts.add(loadedFont);
                            return loadedFont;
                        });
                    });
                    
                    await Promise.all(fontLoadPromises);
                    loadedFonts.add(fontKey);
                    console.log(`Loaded font: ${fontDef.family}`);
                } catch (err) {
                    console.error(`Error loading font ${fontDef.family}:`, err);
                    // Fall back silently - CSS will use fallback fonts
                }
            }

            // --- Content Toggling ---
            function toggleContent(id) {
                const element = document.getElementById(id);
                if (!element) return;
                const isExpanded = element.classList.contains('expanded');
                const header = element.previousElementSibling;
                const toggleIcon = header ? header.querySelector('.toggle-icon') : null;

                if (isExpanded) {
                    element.classList.remove('expanded');
                    if (toggleIcon) toggleIcon.textContent = '+';
                } else {
                    element.classList.add('expanded');
                    if (toggleIcon) toggleIcon.textContent = '−';
                }
            }

            // --- Shared settings with consistent keys ---
            const STORAGE_PREFIX = 'model_viewer_';
            const KEYS = {
                THEME: `${STORAGE_PREFIX}theme`,
                FONT: `${STORAGE_PREFIX}font`,
                DARK_MODE: `modelViewerDarkModeEnabled`
            };

            // Save settings with consistent keys
            function saveSettings(type, value) {
                localStorage.setItem(KEYS[type], value);
            }

            // --- Dark Mode ---
            function setDarkMode(isDark) {
                body.classList.toggle('dark-mode', isDark);
                toggleLabel.textContent = isDark ? 'Dark' : 'Light';
                if (darkModeToggle.checked !== isDark) {
                    darkModeToggle.checked = isDark;
                }
                saveSettings('DARK_MODE', isDark); // Use the shared key
            }


            // --- Theme Selection ---
            function applyTheme(themeName) {
                body.classList.remove('theme-cozy', 'theme-modern');
                body.classList.add(`theme-${themeName}`);
                if (themeSelector.value !== themeName) {
                    themeSelector.value = themeName;
                }
                saveSettings('THEME', themeName); // Use the shared key
                
                // Re-apply font based on theme's default or user's saved preference
                const savedFont = localStorage.getItem(KEYS.FONT);
                const defaultFont = THEME_DEFAULT_FONTS[themeName] || 'tiempos';
                applyFont(savedFont || defaultFont); // Apply saved or new default
            }

            // --- Font Selection ---
            async function applyFont(fontValue) {
                // First, load the font faces dynamically
                await loadFontFace(fontValue);
                
                const fontFamily = FONT_MAP[fontValue];
                const currentTheme = localStorage.getItem(KEYS.THEME) || 'cozy';
                let headingFontFamily = THEME_DEFAULT_HEAD_FONTS[currentTheme]; // Default heading for theme

                if (fontFamily) {
                    // Set body font - content text only, not UI elements
                    body.style.setProperty('--font-body', fontFamily);

                    // Determine appropriate heading font based on selected body font and theme
                    if (currentTheme === 'modern') {
                        headingFontFamily = THEME_DEFAULT_HEAD_FONTS['modern']; // Default to Besley for modern
                    } else { // Cozy theme
                        headingFontFamily = THEME_DEFAULT_HEAD_FONTS['cozy']; // Default to Lora for cozy
                    }
                    
                    // Special case: If Besley is explicitly selected, use it for heading regardless of theme
                    if (fontValue === 'besley') {
                        headingFontFamily = FONT_MAP['besley'];
                    }

                    // Set the content heading font - not UI elements
                    body.style.setProperty('--font-heading', headingFontFamily);

                    // Update the selector value if needed
                    if (fontSelector.value !== fontValue) {
                        fontSelector.value = fontValue;
                    }
                    
                    // Save with the shared key
                    saveSettings('FONT', fontValue);
                } else {
                    console.warn("Font value not found in FONT_MAP:", fontValue);
                    // Fallback to theme default
                    const theme = localStorage.getItem(KEYS.THEME) || 'cozy';
                    applyFont(THEME_DEFAULT_FONTS[theme]);
                }
            }

            // --- Event Listeners ---
            // Dark mode toggled by clicking the label (which triggers the hidden checkbox)
            darkModeToggle.addEventListener('change', function() {
                setDarkMode(this.checked);
            });

            themeSelector.addEventListener('change', function() {
                applyTheme(this.value);
            });

            fontSelector.addEventListener('change', function() {
                applyFont(this.value);
            });

            // --- Initial Settings Application ---
            async function applyInitialSettings() {
                // 1. Dark Mode - use shared key
                const savedDarkMode = localStorage.getItem(KEYS.DARK_MODE);
                const prefersDark = window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches;
                setDarkMode(savedDarkMode !== null ? (savedDarkMode === 'true') : prefersDark);

                // 2. Theme - use shared key
                const savedTheme = localStorage.getItem(KEYS.THEME) || 'cozy';
                applyTheme(savedTheme);

                // 3. Font - use shared key
                const savedFont = localStorage.getItem(KEYS.FONT) || THEME_DEFAULT_FONTS[savedTheme];
                await applyFont(savedFont);
                
                // 4. Font Selector (Ensure it matches the applied font)
                fontSelector.value = savedFont || THEME_DEFAULT_FONTS[savedTheme];
            }

            // Apply settings on load
            applyInitialSettings();

            // Optional: Listen for system theme changes ONLY if no preference is saved
            window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', event => {
                if (localStorage.getItem('darkModeEnabled') === null) {
                    setDarkMode(event.matches);
                }
            });

        </script>
    </body>
    </html>
    """

    # Save to file if requested
    if save_to_file:
        os.makedirs("results", exist_ok=True)
        sanitized_name = sanitize_model_name(get_updated_model_name(original_model_name))
        filename = f"results/{sanitized_name}.html"
        try:
            with open(filename, 'w', encoding='utf-8') as f:
                f.write(html_output)
            print(f"Report saved to {filename}")
        except IOError as e:
            print(f"Error saving report to {filename}: {e}")

    return HTML(html_output)


# --- Helper Functions (Identical to original) ---
def view_model_report(model_name, run_key=None, save_to_file=False):
    """Display the HTML report for a given model and optionally save it."""
    report = generate_model_report(model_name, run_key, save_to_file)
    display(report)

def save_model_report(model_name, run_key=None):
    """Generate and save the HTML report for a given model."""
    generate_model_report(model_name, run_key, save_to_file=True)

def list_available_models():
    """List all models available in the ELO results file."""
    elo_data = load_json_file(ELO_RESULTS_FILE)
    if not elo_data:
        print("No ELO data found.")
        return []
    models = []
    print("Available models (sorted by ELO):")
    for model_name, model_data in elo_data.items():
        elo_score = model_data.get("elo", -float('inf'))
        models.append((model_name, elo_score))
    models.sort(key=lambda x: x[1] if isinstance(x[1], (int, float)) else -float('inf'), reverse=True)
    for rank, (name, elo) in enumerate(models, 1):
        elo_display = f"{elo:.0f}" if isinstance(elo, (int, float)) else "N/A"
        print(f"{rank}. {get_updated_model_name(name)} (ELO: {elo_display})") # Use updated name
    return [name for name, _ in models]

def list_model_runs(model_name):
    """List all runs available for a specific model."""
    runs_data = load_json_file(RUNS_FILE)
    if not runs_data:
        print("No runs data found.")
        return []
    matching_runs = []
    for key, data in runs_data.items():
        if data.get("test_model") == model_name:
            start_time = data.get("start_time", "Unknown Time")
            status = data.get("status", "Unknown Status")
            matching_runs.append((key, start_time, status))
    if not matching_runs:
        print(f"No runs found for model: {model_name}")
        return []
    print(f"\nAvailable runs for {get_updated_model_name(model_name)}:") # Use updated name
    matching_runs.sort(key=lambda x: x[0])
    for idx, (key, time, status) in enumerate(matching_runs, 1):
        print(f"{idx}. {key} (Started: {time}, Status: {status})")
    return [key for key, _, _ in matching_runs]


import html # Import the html module for escaping

# Assume MODELS_TO_IGNORE and get_updated_model_name are defined elsewhere

def format_slop_profile_string(elo_data_with_metrics: Dict[str, Dict]) -> str:
    """
    Formats repetitive word and n-gram data into a single multi-line string
    with HTML formatting (items on single wrapping lines), delimited by model name,
    suitable for embedding in JS. Frequency counts are included for n-grams only.

    Args:
        elo_data_with_metrics: The dictionary containing model data including
                               'top_repetitive_words', 'top_multi_prompt_bigrams',
                               and 'top_multi_prompt_trigrams'.

    Returns:
        A single multi-line string containing the formatted slop profiles for all models.
    """
    output_string = ""

    # Sort models consistently, e.g., by normalized ELO descending
    sorted_models = sorted(
        elo_data_with_metrics.items(),
        key=lambda item: (
            item[1].get("normalized_elo", -float('inf'))
            if isinstance(item[1].get("normalized_elo"), (int, float))
            else (
                item[1].get("elo", -float('inf'))
                if isinstance(item[1].get("elo"), (int, float))
                else -float('inf')
            )
        ),
        reverse=True
    )

    for model_name, data in sorted_models:
        if model_name in MODELS_TO_IGNORE:
            continue

        updated_name = get_updated_model_name(model_name)
        output_string += f"##### {updated_name}\n"
        # Removed the <h3>Slop Profile</h3> heading as it's likely added in JS modal title

        # --- Top Repetitive Words ---
        rep_words = data.get('top_repetitive_words', [])
        output_string += "<h4>Top Repetitive Words</h4>\n" # Keep heading simple
        if rep_words:
            output_string += "<div class='slop-section-items'>\n" # Container for items
            items_html = []
            # Limit to top 50
            for item in rep_words[:50]:
                word = item.get('word', 'N/A')
                safe_word = html.escape(word)
                # No count for words
                items_html.append(f"<span class='slop-word-item'>{safe_word}</span>")
            output_string += " ".join(items_html) # Join items with spaces
            output_string += "\n</div>\n"
        else:
            output_string += "<p><i>No multi-prompt repetitive words found.</i></p>\n"

        # --- Top Multi-Prompt Bigrams ---
        bigrams = data.get('top_multi_prompt_bigrams', [])
        output_string += "<h4>Top Bigrams</h4>\n" # Keep heading simple
        if bigrams:
            output_string += "<div class='slop-section-items'>\n" # Container for items
            items_html = []
            # Limit to top 30
            for item in bigrams[:30]:
                ngram = item.get('ngram', 'N/A')
                freq = item.get('frequency', 0) # Get frequency
                safe_ngram = html.escape(ngram)
                # Add frequency in parentheses
                items_html.append(f"<span class='slop-ngram-item'>{safe_ngram} ({freq})</span>")
            output_string += " ".join(items_html) # Join items with spaces
            output_string += "\n</div>\n"
        else:
            output_string += "<p><i>No multi-prompt bigrams found.</i></p>\n"

        # --- Top Multi-Prompt Trigrams ---
        trigrams = data.get('top_multi_prompt_trigrams', [])
        output_string += "<h4>Top Trigrams</h4>\n" # Keep heading simple
        if trigrams:
            output_string += "<div class='slop-section-items'>\n" # Container for items
            items_html = []
            # Limit to top 30
            for item in trigrams[:30]:
                ngram = item.get('ngram', 'N/A')
                freq = item.get('frequency', 0) # Get frequency
                safe_ngram = html.escape(ngram)
                # Add frequency in parentheses
                items_html.append(f"<span class='slop-ngram-item'>{safe_ngram} ({freq})</span>")
            output_string += " ".join(items_html) # Join items with spaces
            output_string += "\n</div>\n"
        else:
            output_string += "<p><i>No multi-prompt trigrams found.</i></p>\n"

        output_string += "\n" # Add a blank line between models in the string

    return output_string.strip() # Remove leading/trailing whitespace


def calculate_and_print_metrics(save_updated_elo: bool = True, print_slop_profile: bool = True): # Added print_slop_profile flag
    """
    Calculates aggregated metrics (length, vocab, slop, repetition) AND
    extracts top multi-prompt N-grams for all models found in the runs file.
    Repetition metrics & N-grams only consider words/sequences appearing in multiple prompts.
    Merges metrics with ELO data, prints results, optionally saves the updated ELO data,
    and optionally prints the formatted slop profile string.
    """
    print("\nCalculating aggregated metrics and N-grams...")
    runs_data = load_json_file(RUNS_FILE)
    elo_data = load_json_file(ELO_RESULTS_FILE)

    if not runs_data:
        print(f"Runs data file ('{RUNS_FILE}') is empty or not found. Cannot calculate metrics.")
        return

    # Structure: { model_name: { prompt_id: [text1, text2, ...], ... }, ... }
    model_texts_by_prompt = defaultdict(lambda: defaultdict(list))
    print("Extracting text from runs (grouped by prompt)...")
    processed_runs = 0
    # ... (rest of text extraction logic remains the same) ...
    for run_key, run_data in runs_data.items():
        model_name = run_data.get("test_model")
        if model_name in MODELS_TO_IGNORE:
            continue
        if not model_name: continue
        creative_tasks = run_data.get("creative_tasks", {})
        if not creative_tasks: continue

        run_has_text = False
        for iter_idx, prompt_data in creative_tasks.items():
            for prompt_id, task_data in prompt_data.items():
                if task_data.get("status") not in ["completed", "judged"]: continue
                results_by_mod = task_data.get("results_by_modifier", {})
                for seed_mod, block in results_by_mod.items():
                    response_text = block.get("model_response")
                    if isinstance(response_text, str) and response_text.strip():
                        model_texts_by_prompt[model_name][prompt_id].append(response_text)
                        run_has_text = True
        if run_has_text: processed_runs += 1

    print(f"Extracted text for {len(model_texts_by_prompt)} models from {processed_runs} runs.")
    if not model_texts_by_prompt:
        print("No model text found in any run. Cannot calculate metrics.")
        return

    print("Calculating metrics and extracting N-grams per model...")
    model_metrics = {}
    model_repetitive_words = {}
    model_top_bigrams = {}
    model_top_trigrams = {}

    # --- Iterate through models ---
    for model_name, prompts_data in model_texts_by_prompt.items():
        all_responses_flat = [] # For slop, complexity, avg_length
        texts_with_ids_list = [] # For repetition metrics
        repetition_score = 0.0  # Default score
        top_repetitive_words = [] # Default list
        top_bigrams = []        # Default list
        top_trigrams = []       # Default list

        # ... (rest of the metric calculation logic remains the same) ...
        # Basic check: Does the model have *any* data?
        if not prompts_data:
            print(f"Skipping {model_name}: No prompt data found.")
            continue

        print(f"\n  Processing {model_name} (Responses from {len(prompts_data)} prompts)...")

        # Check for multi-prompt data BEFORE calculating repetition/n-grams
        has_multi_prompt_data = len(prompts_data) >= 2

        # Populate lists needed for different metrics
        for prompt_id, texts in prompts_data.items():
            all_responses_flat.extend(texts)
            # Only add to list for repetition if multi-prompt data exists
            if has_multi_prompt_data:
                 for text in texts:
                     if isinstance(text, str) and text.strip(): # Ensure only valid text is added
                         texts_with_ids_list.append((text, prompt_id))

        # --- Calculate N-grams, N-gram Repetition Score, and Top Repetitive Words (only if multi-prompt data exists) ---
        repetition_score = 0.0
        top_bigrams = []
        top_trigrams = []
        top_repetitive_words = []

        if has_multi_prompt_data and texts_with_ids_list:
            ngram_calculation_error = False
            word_extraction_error = False

            # --- Calculate N-grams and N-gram based Repetition Score ---
            print("      Calculating N-grams and N-gram Repetition Score (multi-prompt)...")
            try:
                top_bigrams = get_multi_prompt_ngrams(prompts_data, n=2, top_k=40, min_prompt_ids=2)
                if top_bigrams:
                    model_top_bigrams[model_name] = top_bigrams
                    repetition_score += sum(freq for ngram, freq in top_bigrams)
                    print(f"        Found {len(top_bigrams)} top bigrams meeting criteria.")
                else:
                    print("        No bigrams met the multi-prompt criteria.")

                top_trigrams = get_multi_prompt_ngrams(prompts_data, n=3, top_k=40, min_prompt_ids=2)
                if top_trigrams:
                    model_top_trigrams[model_name] = top_trigrams
                    repetition_score += sum(freq for ngram, freq in top_trigrams)
                    print(f"        Found {len(top_trigrams)} top trigrams meeting criteria.")
                else:
                    print("        No trigrams met the multi-prompt criteria.")

            except NameError:
                print("      ERROR: `get_multi_prompt_ngrams` function not found. Skipping N-gram calculation and score.")
                ngram_calculation_error = True
                repetition_score = 'Error'
            except Exception as e:
                print(f"      ERROR calculating N-grams for {model_name}: {e}")
                ngram_calculation_error = True
                repetition_score = 'Error'

            if not ngram_calculation_error:
                 print(f"        Calculated N-gram repetition score (sum of freqs): {repetition_score}")

            # --- Extract Top Repetitive Words (Independent of N-gram score) ---
            print("      Extracting top repetitive words (multi-prompt)...")
            try:
                top_repetitive_words = get_top_repetitive_words(texts_with_ids_list, top_n=200, min_prompt_ids=2) # Get more initially
                if top_repetitive_words:
                     model_repetitive_words[model_name] = top_repetitive_words # Store all found
                     print(f"        Found {len(top_repetitive_words)} repetitive words meeting criteria.")
                else:
                     print("        No words met the multi-prompt repetitive word criteria.")
            except NameError:
                 print("      ERROR: `get_top_repetitive_words` function not found. Skipping word extraction.")
                 word_extraction_error = True
            except Exception as e:
                 print(f"      ERROR extracting repetitive words for {model_name}: {e}")
                 word_extraction_error = True

        elif not has_multi_prompt_data:
             print(f"      Skipping N-grams, Repetition Score, and Repetitive Words: Only 1 prompt ID found.")
        elif not texts_with_ids_list:
             print("      Skipping N-grams, Repetition Score, and Repetitive Words: No valid text entries found after filtering.")

        # --- Calculate Other Metrics ---
        print("      Calculating other metrics (length, vocab, slop)...")
        # ... (Calculation logic for avg_length, vocab_complexity, slop_score remains the same) ...
        num_responses = len(all_responses_flat)
        if num_responses == 0:
            print(f"      Skipping length/vocab/slop metrics: No text after flattening.")
            avg_length = 0.0
            vocab_complexity = 0.0
            slop_score = 0.0
        else:
            total_chars = sum(len(r) for r in all_responses_flat if isinstance(r, str))
            avg_length = round(total_chars / num_responses, 2)
            all_text_combined = "\n\n".join(r for r in all_responses_flat if isinstance(r, str))
            if not all_text_combined.strip():
                 print("      Warning: Combined text is empty after joining, cannot calculate vocab/slop.")
                 vocab_complexity = 0.0
                 slop_score = 0.0
            else:
                try:
                    vocab_complexity = calculate_complexity_index(all_text_combined)
                except Exception as e:
                    print(f"      ERROR calculating vocab complexity for {model_name}: {e}")
                    vocab_complexity = 'Error'
                try:
                    slop_score = calculate_slop_index_new(all_text_combined)
                except Exception as e:
                    print(f"      ERROR calculating slop score for {model_name}: {e}")
                    slop_score = 'Error'

        # --- Store Metrics ---
        model_metrics[model_name] = {
            'avg_length': avg_length,
            'vocab_complexity': round(vocab_complexity, 4) if isinstance(vocab_complexity, (int, float)) and vocab_complexity != float('inf') else str(vocab_complexity),
            'slop_score': round(slop_score, 4) if isinstance(slop_score, (int, float)) else str(slop_score),
            'repetition_score': round(repetition_score, 4) if isinstance(repetition_score, (int, float)) else str(repetition_score) # Store potentially updated score
        }

        # --- Print Summary for Model ---
        # ... (Print summary logic remains the same) ...
        print(f"    Metrics - Avg Len: {avg_length:.0f}, Vocab K: {model_metrics[model_name]['vocab_complexity']}, "
              f"Slop: {model_metrics[model_name]['slop_score']}, Repetition (multi-prompt): {model_metrics[model_name]['repetition_score']}")

        if top_repetitive_words:
            filtered_top_words = top_repetitive_words[:10] # Limit printout in console
            print(f"    Top multi-prompt repetitive words: " + ", ".join([f"{word} ({score:.1f}x)" for word, score in filtered_top_words]))
        elif has_multi_prompt_data:
             print("    No words met the multi-prompt repetition criteria.")

        if top_bigrams:
            print("    Top multi-prompt Bigrams:")
            for bg, freq in top_bigrams[:5]: # Limit printout
                print(f"      - {' '.join(bg)} ({freq})")
        elif has_multi_prompt_data:
            print("    No bigrams met the multi-prompt criteria.")

        if top_trigrams:
            print("    Top multi-prompt Trigrams:")
            for tg, freq in top_trigrams[:5]: # Limit printout
                print(f"      - {' '.join(tg)} ({freq})")
        elif has_multi_prompt_data:
            print("    No trigrams met the multi-prompt criteria.")


    # --- Merging metrics with ELO data ---
    print("\nMerging metrics with ELO data...")
    updated_elo_data = elo_data.copy() if isinstance(elo_data, dict) else {}

    # Add calculated metrics
    for model_name, metrics in model_metrics.items():
        if model_name not in updated_elo_data:
            updated_elo_data[model_name] = {}
            print(f"  Note: Model '{model_name}' found in runs but not in ELO data. Added entry.")
        updated_elo_data[model_name].update(metrics)

    # Add repetitive words if found (Store in the format expected by format_slop_profile_string)
    for model_name, words_list in model_repetitive_words.items():
        if model_name in updated_elo_data:
             updated_elo_data[model_name]['top_repetitive_words'] = [
                 {"word": word, "score": float(score)}
                 for word, score in words_list # words_list is already [(word, score), ...]
             ]

    # Add N-grams if found (Store in the format expected by format_slop_profile_string)
    for model_name, ngrams_list in model_top_bigrams.items():
         if model_name in updated_elo_data:
             updated_elo_data[model_name]['top_multi_prompt_bigrams'] = [
                 {"ngram": ' '.join(ngram), "frequency": int(freq)}
                 for ngram, freq in ngrams_list # ngrams_list is already [(('w1','w2'), freq), ...]
             ]
    for model_name, ngrams_list in model_top_trigrams.items():
         if model_name in updated_elo_data:
             updated_elo_data[model_name]['top_multi_prompt_trigrams'] = [
                 {"ngram": ' '.join(ngram), "frequency": int(freq)}
                 for ngram, freq in ngrams_list # ngrams_list is already [(('w1','w2','w3'), freq), ...]
             ]

    # --- Set default values for models present in ELO but potentially missing metrics ---
    default_metrics = {
        'avg_length': 0.0, 'vocab_complexity': 'N/A', 'slop_score': 'N/A',
        'repetition_score': 0.0, 'top_repetitive_words': [],
        'top_multi_prompt_bigrams': [], 'top_multi_prompt_trigrams': []
    }
    # ... (Default value setting logic remains the same) ...
    all_model_names = set(updated_elo_data.keys())
    for model_name in all_model_names:
        if model_name not in updated_elo_data: continue
        for key, default_value in default_metrics.items():
            updated_elo_data[model_name].setdefault(key, default_value)


    # --- Normalize ELO scores ---
    print("\nNormalizing ELO scores...")
    # ... (Normalization logic remains the same) ...
    raw_elo_scores = {}
    for model_name, data in updated_elo_data.items():
        elo = data.get('elo')
        if isinstance(elo, (int, float)):
            raw_elo_scores[model_name] = elo

    anchor_models = {
        'deepseek/deepseek-r1': 1500,
        'meta-llama/llama-3.2-1b-instruct': 200
    }

    normalized_scores = normalize_elo_scores(raw_elo_scores, anchor_models)

    normalized_count = 0
    for model_name, normalized_elo in normalized_scores.items():
        if model_name in updated_elo_data:
            updated_elo_data[model_name]['normalized_elo'] = round(normalized_elo, 1)
            normalized_count += 1

    print(f"  Normalized ELO scores for {normalized_count} models using anchor models.")


    # --- Print CSV Results ---
    print("\n--- Aggregated Metrics Results (CSV Format) ---")
    # **IMPORTANT**: Update the header to include repetition_score
    print("model_name,elo_score,creative_writing_score,avg_length,vocab_complexity,slop_score,repetition_score")
    # ... (Sorting logic remains the same) ...
    sorted_models = sorted(
        updated_elo_data.items(),
        key=lambda item: (
            item[1].get("normalized_elo", -float('inf'))
            if isinstance(item[1].get("normalized_elo"), (int, float))
            else (
                item[1].get("elo", -float('inf'))
                if isinstance(item[1].get("elo"), (int, float))
                else -float('inf')
            )
        ),
        reverse=True
    )

    for model_name, data in sorted_models:
        if model_name in MODELS_TO_IGNORE:
            continue
        updated_name = get_updated_model_name(model_name)
        # ... (Safely get and format other data points remains the same) ...
        elo = data.get('elo', 'N/A')
        elo_display = f"{elo:.1f}" if isinstance(elo, (int, float)) else 'N/A'

        norm_elo = data.get('normalized_elo', 'N/A')
        norm_elo_display = f"{norm_elo:.1f}" if isinstance(norm_elo, (int, float)) else 'N/A'

        creative_score = data.get('creative_writing_rubric_score_agg', 'N/A')
        creative_score_display = f"{creative_score:.2f}" if isinstance(creative_score, (int, float)) else 'N/A'

        avg_len = data.get('avg_length', 'N/A')
        avg_len_display = f"{avg_len:.0f}" if isinstance(avg_len, (int, float)) else 'N/A'

        vocab = data.get('vocab_complexity', 'N/A')
        vocab_display = f"{float(vocab):.2f}" if isinstance(vocab, (int, float)) else str(vocab)

        slop = data.get('slop_score', 'N/A')
        slop_display = f"{float(slop):.2f}" if isinstance(slop, (int, float)) else str(slop)

        # **IMPORTANT**: Get and format repetition_score
        repetition = data.get('repetition_score', 'N/A')
        # Use .0f for integer display, handle non-numeric
        repetition_display = f"{float(repetition):.0f}" if isinstance(repetition, (int, float)) else str(repetition)

        safe_model_name = f'"{updated_name}"' if ',' in updated_name else updated_name
        # **IMPORTANT**: Add repetition_display to the print statement
        print(f"{safe_model_name},{norm_elo_display},{creative_score_display},{avg_len_display},{vocab_display},{slop_display},{repetition_display}")


    # --- Save Updated ELO Data ---
    if save_updated_elo:
        print(f"\nSaving updated ELO data with metrics (and N-grams) to {ELO_RESULTS_UPDATED_FILE}...")
        # ... (Saving logic remains the same) ...
        try:
            with open(ELO_RESULTS_UPDATED_FILE, 'w', encoding='utf-8') as f:
                json.dump(updated_elo_data, f, indent=2, ensure_ascii=False)
            print("Save successful.")
        except IOError as e:
            print(f"Error saving updated ELO data to {ELO_RESULTS_UPDATED_FILE}: {e}")
        except TypeError as e:
             print(f"Error serializing updated ELO data to JSON: {e}. Check for non-serializable types.")


    # --- Generate and Print Slop Profile String ---
    if print_slop_profile:
        print("\n--- Generating Slop Profile String for JS ---")
        slop_profile_output = format_slop_profile_string(updated_elo_data)
        print("\n----- BEGIN SLOP PROFILE STRING -----")
        print(slop_profile_output)
        print("----- END SLOP PROFILE STRING -----\n")
        # Optionally save to a file as well
        # try:
        #     with open("slop_profiles.txt", "w", encoding="utf-8") as f:
        #         f.write(slop_profile_output)
        #     print("Slop profile string also saved to slop_profiles.txt")
        # except IOError as e:
        #     print(f"Error saving slop profile string to file: {e}")

def normalize_elo_scores(raw_scores, anchor_models=None):
    """
    Normalize ELO scores by anchoring specific models to predefined values.
    
    Args:
        raw_scores (dict): Dictionary of model names to raw ELO scores
        anchor_models (dict, optional): Dictionary mapping model names to their anchor values.
            Default: {'deepseek/deepseek-r1': 1500, 'mistralai/ministral-3b': 200}
            
    Returns:
        dict: Dictionary of model names to normalized ELO scores
    """
    if anchor_models is None:
        anchor_models = {
            'deepseek/deepseek-r1': 1500,
            'meta-llama/llama-3.2-1b-instruct': 200
        }
    
    # First check if we have at least two anchor models in our raw scores
    valid_anchors = {k: v for k, v in anchor_models.items() if k in raw_scores}
    
    if len(valid_anchors) < 2:
        print(f"Warning: Not enough anchor models found in scores. "
              f"Found {len(valid_anchors)} of {len(anchor_models)}. "
              f"Returning raw scores.")
        return {k: v for k, v in raw_scores.items()}
    
    # Get first two valid anchors to calculate normalization
    anchor_items = list(valid_anchors.items())
    model_a, target_a = anchor_items[0]
    model_b, target_b = anchor_items[1]
    
    # Calculate the scale and shift for the linear transformation
    raw_a = raw_scores[model_a]
    raw_b = raw_scores[model_b]
    
    # Avoid division by zero
    if raw_a == raw_b:
        scale = 1.0
    else:
        scale = (target_a - target_b) / (raw_a - raw_b)
    
    shift = target_a - (scale * raw_a)
    
    # Apply the transformation to all scores
    normalized_scores = {model: (score * scale + shift) for model, score in raw_scores.items()}
    
    return normalized_scores


# --- Main Execution Block ---
if __name__ == "__main__":
    # Ensure the results directory exists for saving reports
    os.makedirs("results", exist_ok=True)

    # 1. List available models (optional, uses updated names)
    print("--- Available Models ---")
    models = list_available_models()
    print("-" * 24)

    # 2. Calculate and print the aggregated metrics
    #    Set save_updated_elo=True to save to ELO_RESULTS_UPDATED_FILE
    calculate_and_print_metrics(save_updated_elo=False) # Set to True to save the file
    # print("-" * 24)

    # 3. Example: Generate and save reports for *all* models
    #    (using the updated generate_model_report function)
    
    print("\nGenerating and saving HTML reports for all models...")
    if models:
        for model in models:
            if model in MODELS_TO_IGNORE:
                continue
            print(f"Processing report for: {get_updated_model_name(model)}")
            try:
                save_model_report(model) # This now generates the report with themes/fonts
            except Exception as e:
                print(f"  ERROR generating report for {model}: {e}")
        print("\nFinished saving reports.")
    else:
        print("\nNo models found in ELO data to generate reports for.")

    # 4. Example: View a report directly in IPython/Jupyter (if available)
    # if models and 'IPython' in sys.modules:
    #     print(f"\nDisplaying report for {get_updated_model_name(models[0])} in IPython...")
    #     view_model_report(models[0]) # Display the first model's report
    # else:
    #      print("\nSkipping direct display (not in IPython or no models found).")

    print("\nScript finished.")

--- Available Models ---
Available models (sorted by ELO):
1. deepseek-ai/DeepSeek-R1 (ELO: 1620)
2. chatgpt-4o-latest-2025-03-27 (ELO: 1486)
3. deepseek-ai/DeepSeek-V3-0324 (ELO: 1479)
4. gemini-2.5-pro-exp-03-25 (ELO: 1399)
5. claude-3-5-sonnet-20241022 (ELO: 1373)
6. chatgpt-4o-latest-2025-01-29 (ELO: 1366)
7. RekaAI/reka-flash-3 (ELO: 1351)
8. qwen/qwq-32b (ELO: 1346)
9. claude-3-7-sonnet-20250219 (ELO: 1339)
10. google/gemma-3-27b-it (ELO: 1250)
11. gpt-4.5-preview (ELO: 1197)
12. CohereForAI/c4ai-command-a-03-2025 (ELO: 1138)
13. anthropic/claude-3.5-haiku-20241022 (ELO: 1132)
14. google/gemma-3-12b-it (ELO: 1118)
15. sam-paech/Darkest-muse-v1 (ELO: 1106)
16. gemini-2.0-flash-001 (ELO: 1099)
17. allura-org/Gemma-3-Glitter-12B (ELO: 1070)
18. google/gemma-3-4b-it (ELO: 1037)
19. ifable/gemma-2-Ifable-9B (ELO: 1003)
20. ToastyPigeon/Gemma-3-Starshine-12B (ELO: 813)
21. mistralai/Mistral-Nemo-Instruct-2407 (ELO: 803)
22. gpt-4o-mini (ELO: 796)
23. meta-llama/llama-3.1-405b-instruct 