# MACO-Style Multi-Agent Content Optimization (Paper-Aligned)

This notebook implements the full pipeline, including frozen corpus, evaluator with MIS/ISR/MIV, iterative optimization loop, analyst/editor agents, and hybrid selector.

## 0) Setup & config

In [27]:
from dotenv import load_dotenv
load_dotenv()

False

In [None]:
import os, json, time, hashlib, re, textwrap
from typing import List, Dict, Any, Tuple
from dataclasses import dataclass
from datetime import datetime
import sys

# NOTE: API keys and secrets are loaded from environment variables (see .env or your shell config)
# Example expected variables (DO NOT hard-code real values here):
#   GOOGLE_API_KEY="***"
#   LANGSMITH_API_KEY="***"
#   LANGSMITH_TRACING="true"      # optional
#   LANGSMITH_PROJECT="***"
#   LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
#   GENSEE_API_KEY="***"

# Model choices & constants
MODEL_EVAL     = "gemini-2.5-flash"
MODEL_ANALYST  = "gemini-2.5-flash"
MODEL_EDITOR   = "gemini-2.5-flash-lite"
TEMPERATURE_EVAL    = 0.0
TEMPERATURE_ANALYST = 0.6
TEMPERATURE_EDITOR  = 0.1

N_QUERIES   = 3        # 5–10, the paper uses 10
MAX_CTX     = 3       # contexts per query, the paper uses 10
SUCCESS_TAU = 0.75     # ISR threshold
N_ITERS     = 5       # iterations; selector often picks ~, the paper uses 10
RANDOM_SEED = 42

# TODO: update the anchors, the paper uses [0,10]
ANCHORS = [0.00, 0.17, 0.33, 0.50, 0.67, 0.83, 1.00]
METRICS = ["CP","AA","FA","KC","SC","AD"]

# TODO: baseline-style labeling
# Optional: tag detection for edits (baseline-style labeling)
TAG_PATTERNS = [
    ("Statistics",    r"\b\d{1,3}(,\d{3})*(\.\d+)?\s?%|\b(?:million|billion|thousand)\b"),
    ("More Quotes",   r"[\"“][^\"”]{8,}[\"”]"),
    ("Citing Sources",r"\b(?:According to|Source:|cited by|as reported by)\b"),
    ("Technical Terms", r"\b(latency|throughput|gradient|API|OAuth|schema|vector|embedding|protocol|REST|GraphQL)\b"),
    ("Authoritative", r"\b(must|should|undoubtedly|certainly|we recommend)\b"),
    ("Fluent",        r"."),  # fallback: any edit without the above
]

# Reproducibility tweaks where applicable
import random
random.seed(RANDOM_SEED)


In [29]:
DEBUG = True

def log_heading(h: str):
    """Log a heading - both prints to console and writes to log file"""
    if DEBUG:
        print("\n" + "="*8 + " " + h + " " + "="*8)

def log_json(name: str, obj):
    """Log JSON object - both prints to console and writes to log file"""
    if DEBUG:
        print(f"\n[{name}]")
        try:
            print(json.dumps(obj, ensure_ascii=False, indent=2))
        except Exception:
            print(str(obj)[:2000])

def log_info(message: str):
    """Helper function to log info messages with timestamp"""
    timestamp = datetime.now().strftime('%H:%M:%S')
    print(f"[{timestamp}] {message}")


In [30]:
# ===== LOGGING SETUP =====
class TeeOutput:
    """Class to capture stdout/stderr and write to both console and file"""
    def __init__(self, terminal, log_file):
        self.terminal = terminal
        self.log_file = log_file
        self.file_handle = None
        self._open_file()
        
    def _open_file(self):
        """Open file handle for writing"""
        self.file_handle = open(self.log_file, 'w', encoding='utf-8')
        
    def write(self, message):
        # Write to terminal
        self.terminal.write(message)
        # Write to file (only if message is not empty)
        if message and self.file_handle:
            self.file_handle.write(message)
            self.file_handle.flush()
        
    def flush(self):
        self.terminal.flush()
        if self.file_handle:
            self.file_handle.flush()
    
    def close(self):
        """Close file handle"""
        if self.file_handle:
            self.file_handle.close()
            self.file_handle = None

def setup_logging():
    """Setup logging to timestamped file. Returns log file path."""
    now = datetime.now()
    # Create logs directory if it doesn't exist
    log_dir = "logs"
    os.makedirs(log_dir, exist_ok=True)
    
    # Create filename: YYYY_MM_DD_HH_MM.txt (e.g., 2025_01_15_14_30.txt)
    log_filename = f"{now.year:04d}_{now.month:02d}_{now.day:02d}_{now.hour:02d}_{now.minute:02d}.txt"
    log_path = os.path.join(log_dir, log_filename)
    
    # Store original stdout/stderr
    original_stdout = sys.stdout
    original_stderr = sys.stderr
    
    # Create TeeOutput instances (they will open the file)
    tee_stdout = TeeOutput(original_stdout, log_path)
    tee_stderr = TeeOutput(original_stderr, log_path)
    
    # Redirect stdout and stderr to TeeOutput
    sys.stdout = tee_stdout
    sys.stderr = tee_stderr
    
    # Write header (this will go through TeeOutput, so no duplication)
    print('='*80)
    print(f"MACO Pipeline Log - Started at {now.strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"Log file: {log_path}")
    print('='*80 + '\n')
    
    return log_path

# Initialize logging
LOG_FILE_PATH = setup_logging()
print(f"[LOG] All output will be saved to: {LOG_FILE_PATH}\n")



## 1) LLM client (LangChain Google GenAI)

In [31]:
from langchain_google_genai import ChatGoogleGenerativeAI

def make_llm(model: str, temperature: float):
    return ChatGoogleGenerativeAI(
        model=model,
        temperature=temperature,
        max_retries=0,
        # relies on GOOGLE_API_KEY env var
    )

def call_llm_json(llm, system: str, user: str, retry: int = 1) -> Dict[str, Any]:
    """
    Call an LLM with system+user text and parse JSON output robustly.
    If schema fails, return {"__SCHEMA_ERROR__": raw_text}
    """
    msgs = [("system", system), ("human", user)]
    out = llm.invoke(msgs)
    text = getattr(out, "content", "") or str(out)
    
    # Strip fencing if present
    text = text.strip()
    if text.startswith("```"):
        text = re.sub(r"^```(?:json)?\s*|\s*```$", "", text, flags=re.S)
    try:
        return json.loads(text)
    except Exception:
        if retry > 0:
            nudged = textwrap.dedent(f"""Your previous reply was not valid JSON. Reprint ONLY strict JSON, no commentary. Original reply: {text}""")
            out2 = llm.invoke([("system", system), ("human", nudged)])
            t2 = getattr(out2, "content", "") or str(out2)
            t2 = re.sub(r"^```(?:json)?\s*|\s*```$", "", t2.strip(), flags=re.S)
            try:
                return json.loads(t2)
            except Exception:
                return {"__SCHEMA_ERROR__": t2}
        return {"__SCHEMA_ERROR__": text}


## 2) Retrieval (Gensee AI)

In [32]:
import os
import requests
from typing import List

def gensee_ai_retrieve(query: str, max_results: int = 3) -> List[str]:
    """
    Retrieves context snippets using the Gensee AI Platform API.

    Notes:
        - Relies on an environment variable `GENSEE_API_KEY` to get the 'Bearer your_token_here'.
        - Returns an empty list if the request fails.
    """
    
    
    api_key = os.getenv("GENSEE_API_KEY",)
    if not api_key:
        print("[WARN] Missing GENSEE_API_KEY environment variable — returning empty list.")
        return []

    # 2. Prepare the API request
    url = 'https://platform.gensee.ai/tool/search'
    
    # 3. Build the payload matching your API's requirements
    data = {
        'query': query,
        'max_results': max_results
    }
    
    # 4. Build the headers matching your API's requirements
    headers = {
        'Content-Type': 'application/json',
        'Authorization': f'Bearer {api_key}' # Dynamically load the key from env
    }

    try:
        # 5. Send the POST request
        response = requests.post(url, json=data, headers=headers, timeout=60)
        response.raise_for_status() # Raise an exception for bad statuses (401, 403, 500, etc.)
        data = response.json()

        # 6. Parse your specific JSON response structure
        #    Based on your example, results are in the 'search_response' key
        results = data.get("search_response", [])
        
        contexts = []
        for item in results:
            # Based on your example, the text snippet is in the 'content' key
            snippet = item.get("content") or ""
            if snippet:
                contexts.append(snippet)
        
        # 7. Ensure a List[str] is returned
        return [ctx for ctx in contexts if ctx][:max_results]

    except requests.exceptions.RequestException as e:
        print(f"[WARN] Gensee AI request failed: {e}")
        return []

## 3) Prompts (Query, Evaluator, Analyst, Editor, Selector)

In [33]:
PROMPT_QUERY_SYSTEM = """You generate user queries for evaluating ONE article.
Produce content-centric queries:
- Each query MUST be answerable using the article alone.
- Cover varied user intents: {definition, learn/explanation, how-to/guide, compare, list/examples}.
- Avoid brand bait, clickbait, or unanswerable questions.
Return your entire response in STRICT JSON::
{"queries":[
  {"intent":"definition","q":"..."},
  {"intent":"learn","q":"..."},
  {"intent":"how-to","q":"..."},
  {"intent":"compare","q":"..."},
  {"intent":"list","q":"..."}
]}
"""

def prompt_query_user(doc: str, n_queries: int = N_QUERIES) -> str:
    return f"[ARTICLE]\n{doc}\n\nReturn {n_queries} queries spread across the intents."

PROMPT_EVAL_SYSTEM = """You are an evaluation judge. Given a candidate article and external contexts:
1) Answer the user query (RAG style).
2) Score the CANDIDATE ARTICLE on SIX dimensions in [0,1] using anchors {0,.17,.33,.5,.67,.83,1}:
   - CP (Citation Prominence): clear, prominent citation/attribution of the candidate article in the final answer.
   - AA (Attribution Accuracy): statements attributed to the article truly originate from it.
   - FA (Faithfulness): answer remains faithful to the article’s meaning (no distortions).
   - KC (Key Concepts): article covers essential concepts needed for this query.
   - SC (Semantic Contribution): article contributes unique/central meaning vs other contexts.
   - AD (Answer Dominance): overall share of answer content deriving from the article vs other contexts.
Rules:
- Judge ONLY the candidate article’s contribution; do not reward contexts.
- If the answer can be formed without the article, penalize SC and AD.
- If external contexts are absent or minimal relative to the answer, DO NOT award SC or AD above 0.33 unless you explicitly justify why the article itself supplies the necessary unique content.
- If the article is very short/sparse and lacks definitions/examples/comparisons needed by the query, reduce KC and FA accordingly.
Return your entire response in STRICT JSON:
{
 "answer": "...",
 "scores": {"CP":0.83,"AA":0.67,"FA":0.83,"KC":0.67,"SC":0.50,"AD":0.50},
 "why": {
   "CP":"...", "AA":"...", "FA":"...", "KC":"...", "SC":"...", "AD":"..."
 }
}
"""

def prompt_eval_user(query: str, doc: str, contexts: List[str]) -> str:
    ctx = "\n---\n".join(contexts[:MAX_CTX]) if contexts else "(no external contexts)"
    return f"[QUERY]\n{query}\n\n[CANDIDATE_ARTICLE]\n{doc}\n\n[CONTEXTS]\n{ctx}"

PROMPT_ANALYST_SYSTEM = """You propose targeted edits to improve the article’s weakest metrics.
Inputs: (1) article, (2) per-query scores with brief rationales, (3) aggregate MIS/ISR/MIV.
Find the single weakest metric by MIS; break ties by high MIV and low ISR.
Propose up to 3 precise edits. For EACH edit include:
- target_metric: one of {CP,AA,FA,KC,SC,AD}
- reason: ≤2 sentences
- location_hint: exact anchor text or section title
- operation: one of {"insert_after","replace_span","append_section","delete_span","merge_sections"}
- patch: exact text to insert/replace (≤180 words)
Return your entire response in STRICT JSON:: {"edits":[{...}, {...}]}
"""

def prompt_analyst_user(doc: str, per_query: List[Dict[str, Any]], agg: Dict[str, Any]) -> str:
    return json.dumps({
        "article": doc,
        "per_query": per_query,
        "aggregate": agg
    }, ensure_ascii=False)

PROMPT_EDITOR_SYSTEM = """Apply ONE provided edit to the article faithfully. 
Do NOT rewrite unrelated text. If location_hint not found, place patch in the nearest logical spot.
Return the FULL revised article only. No explanations.
"""

def prompt_editor_user(doc: str, json_edit: Dict[str, Any]) -> str:
    return json.dumps({"article": doc, "edit": json_edit}, ensure_ascii=False)

PROMPT_SELECTOR_SYSTEM = """You are a selector comparing multiple article versions evaluated on the SAME query+context corpus.
Given MIS, ISR, MIV per version, pick the version that maximizes:
score = sum(MIS[m] for m in [CP,AA,FA,KC,SC,AD]) - 0.2 * sum(MIV[m] for m in [CP,AA,FA,KC,SC,AD]).
Return your entire response in STRICT JSON:: {"winner_index": k, "reason":"≤2 sentences"}
"""

def prompt_selector_user(history_summary: List[Dict[str, Any]]) -> str:
    # history_summary: [{"idx": i, "agg": {...}, "snippet": "..."}]
    return json.dumps({"candidates": history_summary}, ensure_ascii=False)


## 4) Query generation + frozen corpus

In [34]:
# Corpus (build once, then freeze) 
def generate_queries_from_doc(doc_text: str, n_queries: int = N_QUERIES) -> List[str]:
    llm = make_llm(MODEL_ANALYST, temperature=0.3)  # tiny diversity, still on-topic
    payload = call_llm_json(llm, PROMPT_QUERY_SYSTEM, prompt_query_user(doc_text, n_queries))
    if "__SCHEMA_ERROR__" in payload:
        # very robust fallback: produce 5 generic but doc-specific queries
        base = [
            "Give a concise definition.",
            "Explain the key benefits.",
            "Provide a simple example.",
            "Compare it with an alternative.",
            "Give a short step-by-step guide."
        ]
        return [f"{q} (based on the article above)" for q in base][:n_queries]
    qs = [q["q"] for q in payload.get("queries", []) if q.get("q")]
    # dedupe, cap
    seen, uniq = set(), []
    for q in qs:
        if q not in seen:
            uniq.append(q)
            seen.add(q)
    
    if DEBUG:
        log_heading("Query Agent: generated queries")
        for i, q in enumerate(uniq[:n_queries]):
            print(f"{i+1}. {q}")

    return uniq[:n_queries]

def build_corpus_for_doc(doc_text: str, retriever=gensee_ai_retrieve,
                         n_queries=N_QUERIES, max_ctx=MAX_CTX) -> Dict[str, Any]:
    queries = generate_queries_from_doc(doc_text, n_queries=n_queries)
    pairs = []
    for q in queries:
        try:
            ctxs = retriever(q)[:max_ctx]
        except Exception as e:
            ctxs = []
        # keep only queries with at least 2 contexts (so the judge can compare)
        cleaned = []
        for c in ctxs:
            c = re.sub(r"\s+", " ", c.strip())
            if c and c not in cleaned:
                cleaned.append(c)
        if len(cleaned) >= 2:
            pairs.append({"q": q, "ctx": cleaned})
    if DEBUG:
        log_heading("Retrieval: per-query context counts")
        for p in pairs:
            print(f"- {p['q'][:80]}...  | ctx={len(p['ctx'])}")
        log_heading("Retrieved Contexts (Full Content)")
        for i, p in enumerate(pairs):
            print(f"\n--- Query {i+1}: {p['q']} ---")
            for j, ctx in enumerate(p['ctx']):
                print(f"\n[Context {j+1}]")
                print(ctx[:500] + ("..." if len(ctx) > 500 else ""))

    # require minimum coverage
    if len(pairs) < 2:
        raise RuntimeError(f"Corpus too small ({len(pairs)} with >=2 contexts). "
                           f"Set GENSEE_API_KEY and retry, or reduce filters.")
    key = hashlib.md5(doc_text.encode()).hexdigest()[:10]
    path = f"corpus_{key}.json"
    with open(path, "w") as f:
        json.dump({"queries": pairs, "created_at": time.time()}, f, ensure_ascii=False, indent=2)
    return {"queries": pairs, "path": path}

    
def load_corpus(path: str) -> Dict[str, Any]:
    with open(path) as f:
        return json.load(f)


## 5) Evaluator (per-query + MIS/ISR/MIV)

In [35]:
import math
import numpy as np

def _nearest_anchor(x: float) -> float:
    # snap to anchor grid
    if x is None: return 0.0
    try: x = float(x)
    except: return 0.0
    return min(ANCHORS, key=lambda a: abs(a - x))

def evaluator_score(document: str, query: str, contexts: List[str]) -> Dict[str, Any]:
    if DEBUG:
        log_heading(f"Evaluator: Query & Contexts")
        print(f"Query: {query}")
        print(f"\nNumber of contexts: {len(contexts)}")
        for i, ctx in enumerate(contexts):
            print(f"\n[Context {i+1}]")
            print(ctx[:500] + ("..." if len(ctx) > 500 else ""))
    
    llm = make_llm(MODEL_EVAL, TEMPERATURE_EVAL)
    payload = call_llm_json(llm, PROMPT_EVAL_SYSTEM, prompt_eval_user(query, document, contexts))
    if DEBUG:
        log_heading("Evaluator Output")
        log_json("payload", payload)

    answer = payload.get("answer", "")
    raw_scores = (payload.get("scores") or {})
    why = payload.get("why") or {}
    # coerce to anchors & fill missing
    scores = {m: _nearest_anchor(raw_scores.get(m)) for m in METRICS}
    return {"query": query, "scores": scores, "why": why, "answer": answer}

def aggregate_scores(per_query_scores: List[Dict[str, Any]], tau: float = SUCCESS_TAU) -> Dict[str, Dict[str, float]]:
    arr = np.array([[pq["scores"][m] for m in METRICS] for pq in per_query_scores])  # shape Qx6
    mis = dict(zip(METRICS, arr.mean(axis=0).round(4).tolist()))
    isr = dict(zip(METRICS, (arr >= tau).mean(axis=0).round(4).tolist()))
    miv = dict(zip(METRICS, arr.var(axis=0, ddof=0).round(4).tolist()))
    return {"MIS": mis, "ISR": isr, "MIV": miv}


## 6) Analyst (edits) + tag detection

In [36]:
def analyst_propose_edits(doc: str, per_query: List[Dict[str, Any]], agg: Dict[str, Any]) -> Dict[str, Any]:
    llm = make_llm(MODEL_ANALYST, TEMPERATURE_ANALYST)
    payload = call_llm_json(llm, PROMPT_ANALYST_SYSTEM, prompt_analyst_user(doc, per_query, agg))
    if DEBUG:
        log_heading("Analyst: proposed edits")
        log_json("edits", payload)

    if "__SCHEMA_ERROR__" in payload:
        # conservative fallback: add benefits sentence (improves SC/KC)
        return {"edits": [{
            "target_metric": "SC",
            "reason": "Add explicit benefits to improve semantic contribution and sufficiency.",
            "location_hint": "After introduction",
            "operation": "insert_after",
            "patch": "Key benefits include clarity, coverage of essential concepts, and concrete examples that distinguish this article from generic sources."
        }]}
    # auto-tag the proposed patches
    for e in payload.get("edits", []):
        patch = e.get("patch", "")
        for tag, pat in TAG_PATTERNS:
            if re.search(pat, patch, flags=re.I):
                e["tag"] = tag
                break
    return payload


## 7) Editor (apply one edit)

In [37]:
def _apply_edit_locally(doc: str, edit: Dict[str, Any]) -> str:
    """Lightweight, deterministic local editor for simple ops before LLM."""
    op = edit.get("operation")
    hint = edit.get("location_hint","")
    patch = edit.get("patch","").strip()

    if not patch and op != "delete_span":
        return doc

    if op == "insert_after":
        idx = doc.find(hint) if hint else -1
        if idx >= 0:
            cut = idx + len(hint)
            return doc[:cut] + ("\n" if doc[cut:cut+1] != "\n" else "") + patch + "\n" + doc[cut:]
        else:
            # append near end
            return doc.rstrip() + "\n\n" + patch + "\n"

    if op == "replace_span":
        if hint and hint in doc:
            return doc.replace(hint, patch, 1)
        return doc  # fallback: no-op

    if op == "append_section":
        return doc.rstrip() + "\n\n" + patch + "\n"

    if op == "delete_span":
        if hint and hint in doc:
            return doc.replace(hint, "", 1)
        return doc

    if op == "merge_sections":
        # naive: remove duplicate consecutive blank lines (simplify structure)
        merged = re.sub(r"\n{3,}", "\n\n", doc)
        return merged

    return doc

def editor_apply_edit(doc: str, chosen_edit: Dict[str, Any]) -> str:
    """
    First try a deterministic local application; if the hint isn't found or
    the operation needs rewriting, fall back to the LLM editor.
    """
    # Try local
    new_doc = _apply_edit_locally(doc, chosen_edit)
    if new_doc != doc or chosen_edit.get("operation") in ("append_section","merge_sections","delete_span"):
        return new_doc

    # Fallback to LLM editor for tougher cases
    llm = make_llm(MODEL_EDITOR, TEMPERATURE_EDITOR)
    out = llm.invoke([("system", PROMPT_EDITOR_SYSTEM),
                      ("human", prompt_editor_user(doc, chosen_edit))])
    text = getattr(out, "content", "") or str(out)
    return text.strip()


## 8) Optimize loop + hybrid selector

In [38]:
def _history_summary_for_selector(history: List[Tuple[str, List[Dict[str,Any]], Dict[str,Any]]]) -> List[Dict[str, Any]]:
    summ = []
    for i, (doc, perq, agg) in enumerate(history):
        # a short snippet for context
        snippet = (doc[:220] + "…") if len(doc) > 220 else doc
        summ.append({"idx": i, "agg": agg, "snippet": snippet})
    return summ

def score_scalar(agg: Dict[str, Dict[str, float]], lam: float = 0.2) -> float:
    s = sum(agg["MIS"][m] for m in METRICS) - lam * sum(agg["MIV"][m] for m in METRICS)
    return round(float(s), 4)

def select_best_version(history: List[Tuple[str, List[Dict[str,Any]], Dict[str,Any]]]) -> Dict[str, Any]:
    # 1) rule-based ranking
    with_scores = [(i, score_scalar(agg)) for i, (_, _, agg) in enumerate(history)]
    with_scores.sort(key=lambda x: x[1], reverse=True)
    top = [i for i,_ in with_scores[:3]]

    # 2) LLM selector tie-breaker among top-3 (optional; safer)
    llm = make_llm(MODEL_EVAL, 0.0)
    summary = _history_summary_for_selector([history[i] for i in top])
    payload = call_llm_json(llm, PROMPT_SELECTOR_SYSTEM, prompt_selector_user(summary))
    if "__SCHEMA_ERROR__" in payload:
        # fallback to best scalar
        best_idx = top[0]
    else:
        k = payload.get("winner_index", 0)
        best_idx = top[min(max(int(k), 0), len(top)-1)]

    doc, perq, agg = history[best_idx]
    return {"index": best_idx, "doc": doc, "agg": agg, "score_scalar": score_scalar(agg)}

def optimize_doc(doc_text: str, corpus: Dict[str, Any], n_iters: int = N_ITERS):
    history = []
    D = doc_text
    for t in range(n_iters):
        # Evaluate on the frozen corpus
        per_query_scores = []
        for item in corpus["queries"]:
            scores = evaluator_score(D, item["q"], item["ctx"])
            per_query_scores.append(scores)
        agg = aggregate_scores(per_query_scores, tau=SUCCESS_TAU)
        history.append((D, per_query_scores, agg))

        # Analyze & choose an edit
        plan = analyst_propose_edits(D, per_query_scores, agg)
        edits = plan.get("edits", [])
        if not edits:
            # nothing to do -> early stop
            break
        # Choose the edit most aligned with weakest metric (by MIS)
        mis = agg["MIS"]
        weakest = sorted(METRICS, key=lambda m: mis[m])[0]
        chosen = next((e for e in edits if e.get("target_metric")==weakest), edits[0])
        if DEBUG:
            log_heading(f"ITER {t} — Chosen edit")
            log_json("chosen_edit", chosen)

        # Apply
        D = editor_apply_edit(D, chosen)

        if DEBUG:
            log_heading(f"ITER {t} — Editor Output (New Document)")
            print(D) 
            print("="*80)

    return history


## 9) Run end-to-end (example)

In [39]:
SOURCE_DOC = """\
API (Application Programming Interface) is a set of rules and definitions that
allows applications to communicate with each other. Developers use APIs to access
data or functionality from external services without knowing their internal implementations.
"""

# 1) Build (or load) frozen corpus for this document
corpus = build_corpus_for_doc(SOURCE_DOC)  # returns {"queries":[...], "path": ...}
print(f"Frozen corpus saved to: {corpus['path']} with {len(corpus['queries'])} queries.")

# 2) Iterate
hist = optimize_doc(SOURCE_DOC, corpus, n_iters=N_ITERS)

# 3) Select best version
best = select_best_version(hist)

# 4) Report
print("\n=== Iteration summary (MIS per iter) ===")
for i, (_, _, agg) in enumerate(hist):
    mis_line = " ".join([f"{m}:{agg['MIS'][m]:.2f}" for m in METRICS])
    print(f"iter {i:02d} | {mis_line} | scalar={score_scalar(agg):.3f}")

print("\n=== Winner ===")
print(f"Iteration: {best['index']}, scalar={best['score_scalar']:.3f}")
print(best["agg"])
print("\n=== Selected Document ===\n")
print(best["doc"])
