<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/mck_agent_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
MCK-style agent loop with human-review decorator fixed to forward *args, **kwargs.

Focus:
- requires_human_review decorator accepts arbitrary kwargs (e.g., feedback).
- Agent.revise() calls mmr.adapt(..., feedback=feedback) without crashing.
- Lightweight, auditable prints to trace decisions and state transitions.

Run:
    python mck_agent.py
"""

from __future__ import annotations

from dataclasses import dataclass
from functools import wraps
from typing import Any, Dict, List, Optional, Tuple
import random
import time


# -------------------------------
# Decorators
# -------------------------------

def requires_human_review(if_score_below: float):
    """
    Decorator for gating adaptation behind a human-review notice when score is low.
    Crucially: forwards *args, **kwargs so unexpected kwargs (like feedback) won't crash.
    """
    def deco(fn):
        @wraps(fn)
        def wrapped(self, kernel, reason, *args, **kwargs):
            if reason < if_score_below:
                print(f"[HUMAN-REVIEW] score={reason:.3f} < threshold={if_score_below:.3f} — flagging before adapt()")
            return fn(self, kernel, reason, *args, **kwargs)
        return wrapped
    return deco


# -------------------------------
# Data structures
# -------------------------------

@dataclass
class Feedback:
    """Peer feedback artifact that can be logged, audited, and consumed by adaptation."""
    from_peer: str
    t: int
    comment: str
    delta_hint: Optional[str] = None
    weight: float = 1.0


# -------------------------------
# Engines and components
# -------------------------------

class SimilarityEngine:
    """
    Simple similarity proxy:
    - Tokenize by whitespace
    - Compute Jaccard similarity between theory and gt token sets
    """
    def test(self, theory: str, gt: str) -> Dict[str, Any]:
        tset = set(theory.lower().split())
        gset = set(gt.lower().split())
        inter = len(tset & gset)
        union = max(1, len(tset | gset))
        jacc = inter / union
        return {
            "tokens_theory": len(tset),
            "tokens_gt": len(gset),
            "jaccard": jacc,
            "overlap": inter,
            "union": union,
        }


class ScoringEngine:
    """
    Turns similarity signals into a scalar score [0,1].
    Here: just return Jaccard. Could blend multiple signals with weights.
    """
    def evaluate(self, theory: str, sim_res: Dict[str, Any], gt: str) -> float:
        return float(sim_res.get("jaccard", 0.0))


class TheoryGenerator:
    """
    Generates a new theory string based on an 'abstraction' that encodes current strategy.
    - temperature: exploration intensity
    - nudge: a hint or token to incorporate from feedback
    """
    def __init__(self, vocabulary: Optional[List[str]] = None):
        self.vocab = vocabulary or [
            "the", "quick", "brown", "fox", "jumps", "over", "lazy", "dog",
            "smart", "agile", "beyond", "robust", "audit", "traceable", "reliable",
            "policy", "signal", "contract", "governed", "reviewed", "clear"
        ]

    def generate(self, abstraction: Dict[str, Any]) -> str:
        temperature = float(abstraction.get("temperature", 0.2))
        nudge = abstraction.get("nudge")

        k = min(9, max(3, int(3 + temperature * 6)))
        tokens = random.sample(self.vocab, k=k)

        if isinstance(nudge, str) and nudge:
            # Bias by ensuring the nudge appears
            tokens = [nudge] + [tok for tok in tokens if tok != nudge]
        # Always ensure some core structure appears for scoring realism
        core = ["the", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"]
        # Mix core and sampled tokens with a simple heuristic
        mix = []
        for i, tok in enumerate(core):
            mix.append(tok)
            if i % 2 == 1 and tokens:
                mix.append(tokens.pop(0))
        # Deduplicate while preserving order
        seen = set()
        ordered = []
        for tok in mix:
            if tok not in seen:
                seen.add(tok)
                ordered.append(tok)
        return " ".join(ordered)


class MemoryModelReviser:
    """
    Adapts the kernel (agent) state based on a score and optional feedback.
    This is where we translate signals into generator abstractions.
    """
    def __init__(self):
        self.history: List[Dict[str, Any]] = []

    @requires_human_review(if_score_below=0.20)
    def adapt(self, kernel: "Agent", reason: float, feedback: Optional[Feedback] = None) -> None:
        # Translate score to exploration temperature (lower score => explore more)
        temperature = max(0.05, min(1.0, 1.0 - reason))
        nudge = feedback.delta_hint if feedback and feedback.delta_hint else None

        kernel.last_abstraction = {
            "temperature": temperature,
            "nudge": nudge,
            "last_score": reason,
            "last_feedback_peer": getattr(feedback, "from_peer", None),
            "t": getattr(feedback, "t", None),
        }

        record = {
            "ts": time.time(),
            "score": reason,
            "temperature": temperature,
            "nudge": nudge,
            "peer": getattr(feedback, "from_peer", None),
            "t": getattr(feedback, "t", None),
        }
        self.history.append(record)

        print(f"[ADAPT] score={reason:.3f} -> temperature={temperature:.2f} "
              f"nudge={repr(nudge)} peer={record['peer']} t={record['t']}")


# -------------------------------
# Agent
# -------------------------------

class Agent:
    """
    Orchestrates revise cycle:
      1) similarity signals
      2) scoring
      3) mmr.adapt(..., feedback=feedback) [decorator handles human-review notice]
      4) regenerate theory using latest abstraction
    """
    def __init__(self, tg: TheoryGenerator, simi: SimilarityEngine, ste: ScoringEngine,
                 mmr: MemoryModelReviser, gt: str) -> None:
        self.tg = tg
        self.simi = simi
        self.ste = ste
        self.mmr = mmr
        self.gt = gt
        self.last_abstraction: Optional[Dict[str, Any]] = None

    def revise(self, theory: str, feedback: Optional[Feedback]) -> Tuple[str, float, Dict[str, Any]]:
        sim_res = self.simi.test(theory, self.gt)
        score = self.ste.evaluate(theory, sim_res, self.gt)
        # The original failing call site, now safe because decorator forwards kwargs:
        self.mmr.adapt(self, reason=score, feedback=feedback)
        new_theory = self.tg.generate(self.last_abstraction or {})
        audit = {
            "sim_res": sim_res,
            "last_abstraction": self.last_abstraction,
            "old_theory": theory,
            "new_theory": new_theory,
        }
        return new_theory, score, audit


# -------------------------------
# Peer validation
# -------------------------------

def peer_validate(peer: str, theory: str, gt: str, t: int) -> Optional[Feedback]:
    """
    Simple validator:
    - If theory lacks any core tokens from gt, suggest nudging one missing token.
    - Weight feedback higher early on, then taper.
    """
    gt_tokens = gt.lower().split()
    thy_tokens = set(theory.lower().split())

    missing = [tok for tok in gt_tokens if tok not in thy_tokens]
    if not missing:
        return Feedback(
            from_peer=peer,
            t=t,
            comment="Looks aligned; keep consolidating.",
            delta_hint=None,
            weight=0.8
        )

    # Heuristic: select a salient missing token to nudge towards
    candidate = None
    for tok in ["quick", "brown", "fox", "jumps", "lazy", "dog"]:
        if tok in missing:
            candidate = tok
            break
    candidate = candidate or random.choice(missing)

    weight = max(0.4, 1.2 - 0.1 * t)
    return Feedback(
        from_peer=peer,
        t=t,
        comment=f"Missing token '{candidate}'. Consider incorporating it.",
        delta_hint=candidate,
        weight=weight
    )


# -------------------------------
# Main loop
# -------------------------------

def main():
    random.seed(42)

    # Ground truth reference
    X_true = "the quick brown fox jumps over the lazy dog"

    # Components
    tg = TheoryGenerator()
    simi = SimilarityEngine()
    ste = ScoringEngine()
    mmr = MemoryModelReviser()
    agent = Agent(tg=tg, simi=simi, ste=ste, mmr=mmr, gt=X_true)

    # Initial theory is intentionally incomplete
    theory = "brown fox jumps beyond policy"
    peers = ["alpha", "beta", "gamma"]

    print("[INIT] gt:", X_true)
    print("[INIT] theory0:", theory)
    print("-" * 72)

    T = 5  # time steps
    for t in range(1, T + 1):
        print(f"\n[STEP] t={t}")
        # Step 2: peer validation + revision
        for peer in peers:
            fb = peer_validate(peer, theory, gt=X_true, t=t)
            if not fb:
                print(f"[PEER] {peer}: no feedback")
                continue

            print(f"[PEER] {peer}: {fb.comment} (weight={fb.weight:.2f}, t={fb.t}, hint={repr(fb.delta_hint)})")
            theory, score, audit = agent.revise(theory, fb)

            sim = audit["sim_res"]
            print(f"[SCORE] jaccard={sim['jaccard']:.3f} overlap={sim['overlap']}/{sim['union']} -> new theory:")
            print("        ", theory)

        # Step 3: novelty and broadcast (optional exploration step per time slice)
        agent.last_abstraction = {
            **(agent.last_abstraction or {}),
            "temperature": min(1.0, (agent.last_abstraction or {}).get("temperature", 0.2) + 0.05),
        }
        theory = agent.tg.generate(agent.last_abstraction or {})
        print(f"[NOVELTY] t={t} broadcast theory:")
        print("          ", theory)

    print("\n" + "-" * 72)
    print("[DONE] Final theory:", theory)
    print("[HISTORY] adapt events:", len(mmr.history))
    if mmr.history:
        last = mmr.history[-1]
        print(f"         last: score={last['score']:.3f} temp={last['temperature']:.2f} nudge={repr(last['nudge'])}")


if __name__ == "__main__":
    main()

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
MCK-style agent loop with:
1) requires_human_review decorator that forwards *args, **kwargs
2) Lock-in mechanism: when theory fully matches GT, exploration is frozen
"""

from __future__ import annotations
from dataclasses import dataclass
from functools import wraps
from typing import Any, Dict, List, Optional, Tuple
import random
import time

# -------------------------------
# Decorators
# -------------------------------

def requires_human_review(if_score_below: float):
    """Decorator that prints a review notice for low scores, forwarding all args/kwargs."""
    def deco(fn):
        @wraps(fn)
        def wrapped(self, kernel, reason, *args, **kwargs):
            if reason < if_score_below:
                print(f"[HUMAN-REVIEW] score={reason:.3f} < threshold={if_score_below:.3f} — flagging before adapt()")
            return fn(self, kernel, reason, *args, **kwargs)
        return wrapped
    return deco

# -------------------------------
# Data structures
# -------------------------------

@dataclass
class Feedback:
    from_peer: str
    t: int
    comment: str
    delta_hint: Optional[str] = None
    weight: float = 1.0

# -------------------------------
# Engines and components
# -------------------------------

class SimilarityEngine:
    def test(self, theory: str, gt: str) -> Dict[str, Any]:
        tset = set(theory.lower().split())
        gset = set(gt.lower().split())
        inter = len(tset & gset)
        union = max(1, len(tset | gset))
        return {
            "tokens_theory": len(tset),
            "tokens_gt": len(gset),
            "jaccard": inter / union,
            "overlap": inter,
            "union": union,
        }

class ScoringEngine:
    def evaluate(self, theory: str, sim_res: Dict[str, Any], gt: str) -> float:
        return float(sim_res.get("jaccard", 0.0))

class TheoryGenerator:
    def __init__(self, vocabulary: Optional[List[str]] = None):
        self.vocab = vocabulary or [
            "the", "quick", "brown", "fox", "jumps", "over", "lazy", "dog",
            "smart", "agile", "beyond", "robust", "audit", "traceable", "reliable",
            "policy", "signal", "contract", "governed", "reviewed", "clear"
        ]

    def generate(self, abstraction: Dict[str, Any]) -> str:
        temperature = float(abstraction.get("temperature", 0.2))
        nudge = abstraction.get("nudge")

        k = min(9, max(3, int(3 + temperature * 6)))
        tokens = random.sample(self.vocab, k=k)
        if isinstance(nudge, str) and nudge:
            tokens = [nudge] + [tok for tok in tokens if tok != nudge]

        core = ["the", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"]
        mix = []
        for i, tok in enumerate(core):
            mix.append(tok)
            if i % 2 == 1 and tokens:
                mix.append(tokens.pop(0))

        seen, ordered = set(), []
        for tok in mix:
            if tok not in seen:
                seen.add(tok)
                ordered.append(tok)
        return " ".join(ordered)

class MemoryModelReviser:
    def __init__(self):
        self.history: List[Dict[str, Any]] = []

    @requires_human_review(if_score_below=0.20)
    def adapt(self, kernel: "Agent", reason: float, feedback: Optional[Feedback] = None) -> None:
        temperature = max(0.05, min(1.0, 1.0 - reason))
        nudge = feedback.delta_hint if feedback and feedback.delta_hint else None

        kernel.last_abstraction = {
            "temperature": temperature,
            "nudge": nudge,
            "last_score": reason,
            "last_feedback_peer": getattr(feedback, "from_peer", None),
            "t": getattr(feedback, "t", None),
        }

        self.history.append({
            "ts": time.time(),
            "score": reason,
            "temperature": temperature,
            "nudge": nudge,
            "peer": getattr(feedback, "from_peer", None),
            "t": getattr(feedback, "t", None),
        })

        print(f"[ADAPT] score={reason:.3f} -> temp={temperature:.2f} nudge={repr(nudge)} "
              f"peer={getattr(feedback, 'from_peer', None)} t={getattr(feedback, 't', None)}")

# -------------------------------
# Agent
# -------------------------------

class Agent:
    def __init__(self, tg: TheoryGenerator, simi: SimilarityEngine, ste: ScoringEngine,
                 mmr: MemoryModelReviser, gt: str) -> None:
        self.tg = tg
        self.simi = simi
        self.ste = ste
        self.mmr = mmr
        self.gt = gt
        self.last_abstraction: Optional[Dict[str, Any]] = None

    def revise(self, theory: str, feedback: Optional[Feedback]) -> Tuple[str, float, Dict[str, Any]]:
        sim_res = self.simi.test(theory, self.gt)
        score = self.ste.evaluate(theory, sim_res, self.gt)

        # Lock-in: if the theory exactly matches GT, freeze exploration and keep theory unchanged
        if theory.strip().lower() == self.gt.strip().lower():
            self.last_abstraction = {"temperature": 0.01, "nudge": None, "last_score": score}
            print("[LOCK-IN] Perfect match detected — freezing exploration.")
            return theory, score, {"sim_res": sim_res, "locked_in": True}

        self.mmr.adapt(self, reason=score, feedback=feedback)
        new_theory = self.tg.generate(self.last_abstraction or {})
        return new_theory, score, {"sim_res": sim_res, "locked_in": False}

# -------------------------------
# Peer validation
# -------------------------------

def peer_validate(peer: str, theory: str, gt: str, t: int) -> Optional[Feedback]:
    gt_tokens = gt.lower().split()
    thy_tokens = set(theory.lower().split())
    missing = [tok for tok in gt_tokens if tok not in thy_tokens]
    if not missing:
        return Feedback(peer, t, "Looks aligned; keep consolidating.", None, 0.8)

    candidate = None
    for tok in ["quick", "brown", "fox", "jumps", "lazy", "dog"]:
        if tok in missing:
            candidate = tok
            break
    candidate = candidate or random.choice(missing)
    weight = max(0.4, 1.2 - 0.1 * t)
    return Feedback(peer, t, f"Missing token '{candidate}'. Consider incorporating it.", candidate, weight)

# -------------------------------
# Main loop
# -------------------------------

def main():
    random.seed(42)
    X_true = "the quick brown fox jumps over the lazy dog"

    tg = TheoryGenerator()
    simi = SimilarityEngine()
    ste = ScoringEngine()
    mmr = MemoryModelReviser()
    agent = Agent(tg=tg, simi=simi, ste=ste, mmr=mmr, gt=X_true)

    theory = "brown fox jumps beyond policy"
    peers = ["alpha", "beta", "gamma"]

    print("[INIT] gt:", X_true)
    print("[INIT] theory0:", theory)
    print("-" * 72)

    T = 5
    for t in range(1, T + 1):
        print(f"\n[STEP] t={t}")
        for peer in peers:
            fb = peer_validate(peer, theory, X_true, t)
            if not fb:
                print(f"[PEER] {peer}: no feedback")
                continue

            print(f"[PEER] {peer}: {fb.comment} (w={fb.weight:.2f}, t={fb.t}, hint={repr(fb.delta_hint)})")
            theory, score, audit = agent.revise(theory, fb)
            sim = audit["sim_res"]
            print(f"[SCORE] jaccard={sim['jaccard']:.3f} overlap={sim['overlap']}/{sim['union']} theory:")
            print("        ", theory)

        # If we hit exact match, skip novelty to preserve lock-in
        if theory.strip().lower() == X_true.strip().lower():
            print("[LOCK-IN] Skipping novelty injection — theory matches GT.")
            continue

        # Optional novelty/broadcast step between rounds
        agent.last_abstraction = {
            **(agent.last_abstraction or {}),
            "temperature": min(1.0, (agent.last_abstraction or {}).get("temperature", 0.2) + 0.05),
        }
        theory = agent.tg.generate(agent.last_abstraction or {})
        print(f"[NOVELTY] t={t} broadcast theory:\n          ", theory)

    print("\n" + "-" * 72)
    print("[DONE] Final theory:", theory)
    print("[HISTORY] adapt events:", len(mmr.history))
    if mmr.history:
        last = mmr.history[-1]
        print(f"         last: score={last['score']:.3f} temp={last['temperature']:.2f} nudge={repr(last['nudge'])}")

if __name__ == "__main__":
    main()

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
End-to-end MCK-style agent with:
- requires_human_review decorator that forwards *args, **kwargs
- Adaptive temperature based on score
- Lock-in: once ground truth (GT) is matched (or fully covered at high score),
  the agent snaps to canonical GT and freezes exploration (no novelty, no further changes)
"""

from __future__ import annotations
from dataclasses import dataclass
from functools import wraps
from typing import Any, Dict, List, Optional, Tuple
import random
import time

# -------------------------------
# Decorators
# -------------------------------

def requires_human_review(if_score_below: float):
    """Decorator that prints a review notice for low scores, forwarding all args/kwargs."""
    def deco(fn):
        @wraps(fn)
        def wrapped(self, kernel, reason, *args, **kwargs):
            if reason < if_score_below:
                print(f"[HUMAN-REVIEW] score={reason:.3f} < threshold={if_score_below:.3f} — flagging before adapt()")
            return fn(self, kernel, reason, *args, **kwargs)
        return wrapped
    return deco

# -------------------------------
# Data structures
# -------------------------------

@dataclass
class Feedback:
    from_peer: str
    t: int
    comment: str
    delta_hint: Optional[str] = None
    weight: float = 1.0

# -------------------------------
# Engines and components
# -------------------------------

class SimilarityEngine:
    """
    Set-based Jaccard similarity:
      - Tokens are whitespace-lowered unique sets (so duplicate 'the' collapses to one)
      - overlap = |T ∩ G|
      - union   = |T ∪ G|
      - jaccard = overlap / union
    """
    def test(self, theory: str, gt: str) -> Dict[str, Any]:
        tset = set(theory.lower().split())
        gset = set(gt.lower().split())
        inter = len(tset & gset)
        union = max(1, len(tset | gset))
        return {
            "tokens_theory": len(tset),
            "tokens_gt": len(gset),
            "jaccard": inter / union,
            "overlap": inter,
            "union": union,
        }

class ScoringEngine:
    def evaluate(self, theory: str, sim_res: Dict[str, Any], gt: str) -> float:
        return float(sim_res.get("jaccard", 0.0))

class TheoryGenerator:
    """
    Token-level generator that interleaves a fixed 'core' with sampled vocabulary.
    To preserve the chance of exact-GT, we DO NOT deduplicate tokens; we keep
    both 'the' occurrences from core.
    """
    def __init__(self, vocabulary: Optional[List[str]] = None):
        self.vocab = vocabulary or [
            "the", "quick", "brown", "fox", "jumps", "over", "lazy", "dog",
            "smart", "agile", "beyond", "robust", "audit", "traceable", "reliable",
            "policy", "signal", "contract", "governed", "reviewed", "clear"
        ]

    def generate(self, abstraction: Dict[str, Any]) -> str:
        # Snap-to-GT if requested by upstream logic
        force_gt = abstraction.get("force_gt")
        if isinstance(force_gt, str) and force_gt:
            return force_gt

        temperature = float(abstraction.get("temperature", 0.2))
        nudge = abstraction.get("nudge")

        k = min(9, max(3, int(3 + temperature * 6)))
        tokens = random.sample(self.vocab, k=k)
        if isinstance(nudge, str) and nudge:
            # Bias by ensuring the nudge is included at the front of the sampled tokens
            tokens = [nudge] + [tok for tok in tokens if tok != nudge]

        # Two 'the' occurrences are intentionally kept to allow exact GT
        core = ["the", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"]

        # Interleave: after every second core token, insert one sampled token if available
        mix: List[str] = []
        token_idx = 0
        for i, tok in enumerate(core):
            mix.append(tok)
            if i % 2 == 1 and token_idx < len(tokens):
                mix.append(tokens[token_idx])
                token_idx += 1

        return " ".join(mix)

class MemoryModelReviser:
    """
    Translates scores + feedback into generator abstraction changes.
    Logs every adaptation and lock-in for auditability.
    """
    def __init__(self):
        self.history: List[Dict[str, Any]] = []

    @requires_human_review(if_score_below=0.20)
    def adapt(self, kernel: "Agent", reason: float, feedback: Optional[Feedback] = None) -> None:
        temperature = max(0.01, min(1.0, 1.0 - reason))
        nudge = feedback.delta_hint if feedback and feedback.delta_hint else None

        kernel.last_abstraction = {
            "temperature": temperature,
            "nudge": nudge,
            "last_score": reason,
            "last_feedback_peer": getattr(feedback, "from_peer", None),
            "t": getattr(feedback, "t", None),
        }

        self.history.append({
            "event": "adapt",
            "ts": time.time(),
            "score": reason,
            "temperature": temperature,
            "nudge": nudge,
            "peer": getattr(feedback, "from_peer", None),
            "t": getattr(feedback, "t", None),
        })

        print(f"[ADAPT] score={reason:.3f} -> temp={temperature:.2f} nudge={repr(nudge)} "
              f"peer={getattr(feedback, 'from_peer', None)} t={getattr(feedback, 't', None)}")

    def record_lock_in(self, theory: str, gt: str) -> None:
        self.history.append({
            "event": "lock_in",
            "ts": time.time(),
            "score": 1.0 if theory.strip().lower() == gt.strip().lower() else None,
            "note": "Snapped to canonical GT and froze exploration."
        })

# -------------------------------
# Agent
# -------------------------------

class Agent:
    """
    Agent orchestrating:
      - similarity → score
      - adapt with feedback
      - generate new theory
      - lock-in once GT is achieved (exact or high-confidence full token coverage)
    """
    def __init__(self, tg: TheoryGenerator, simi: SimilarityEngine, ste: ScoringEngine,
                 mmr: MemoryModelReviser, gt: str) -> None:
        self.tg = tg
        self.simi = simi
        self.ste = ste
        self.mmr = mmr
        self.gt = gt
        self.last_abstraction: Optional[Dict[str, Any]] = None
        self.locked: bool = False

    def _snap_to_gt_and_lock(self) -> None:
        self.locked = True
        self.last_abstraction = {"temperature": 0.01, "nudge": None, "force_gt": self.gt, "last_score": 1.0}
        self.mmr.record_lock_in(self.gt, self.gt)
        print("[LOCK-IN] Snapping to canonical GT and freezing exploration.")

    def revise(self, theory: str, feedback: Optional[Feedback]) -> Tuple[str, float, Dict[str, Any]]:
        # If locked, keep returning the canonical GT without any further changes
        if self.locked:
            sim_res = self.simi.test(theory, self.gt)
            return self.gt, 1.0 if theory.strip().lower() == self.gt.strip().lower() else self.ste.evaluate(theory, sim_res, self.gt), {
                "sim_res": sim_res,
                "locked_in": True
            }

        sim_res = self.simi.test(theory, self.gt)
        score = self.ste.evaluate(theory, sim_res, self.gt)

        # Exact string match → lock immediately
        if theory.strip().lower() == self.gt.strip().lower():
            self._snap_to_gt_and_lock()
            return self.gt, 1.0, {"sim_res": sim_res, "locked_in": True}

        # High-confidence full token coverage (all unique GT tokens present) → snap to GT and lock
        full_coverage = (sim_res["overlap"] == sim_res["tokens_gt"])
        if full_coverage and score >= 0.95 and (not feedback or not feedback.delta_hint):
            self._snap_to_gt_and_lock()
            return self.gt, 1.0, {"sim_res": sim_res, "locked_in": True}

        # Otherwise, adapt and generate next theory
        self.mmr.adapt(self, reason=score, feedback=feedback)
        new_theory = self.tg.generate(self.last_abstraction or {})

        # If generation naturally hits exact GT, lock immediately
        if new_theory.strip().lower() == self.gt.strip().lower():
            self._snap_to_gt_and_lock()
            return self.gt, 1.0, {"sim_res": sim_res, "locked_in": True}

        return new_theory, score, {"sim_res": sim_res, "locked_in": False}

# -------------------------------
# Peer validation
# -------------------------------

def peer_validate(peer: str, theory: str, gt: str, t: int) -> Optional[Feedback]:
    gt_tokens = gt.lower().split()
    thy_tokens = set(theory.lower().split())
    missing = [tok for tok in gt_tokens if tok not in thy_tokens]
    if not missing:
        return Feedback(peer, t, "Looks aligned; keep consolidating.", None, 0.8)

    candidate = None
    for tok in ["quick", "brown", "fox", "jumps", "lazy", "dog"]:
        if tok in missing:
            candidate = tok
            break
    candidate = candidate or random.choice(missing)
    weight = max(0.4, 1.2 - 0.1 * t)
    return Feedback(peer, t, f"Missing token '{candidate}'. Consider incorporating it.", candidate, weight)

# -------------------------------
# Main loop
# -------------------------------

def main():
    random.seed(42)

    X_true = "the quick brown fox jumps over the lazy dog"

    tg = TheoryGenerator()
    simi = SimilarityEngine()
    ste = ScoringEngine()
    mmr = MemoryModelReviser()
    agent = Agent(tg=tg, simi=simi, ste=ste, mmr=mmr, gt=X_true)

    theory = "brown fox jumps beyond policy"
    peers = ["alpha", "beta", "gamma"]

    print("[INIT] gt:", X_true)
    print("[INIT] theory0:", theory)
    print("-" * 72)

    T = 5
    for t in range(1, T + 1):
        print(f"\n[STEP] t={t}")
        for peer in peers:
            fb = peer_validate(peer, theory, X_true, t)
            if not fb:
                print(f"[PEER] {peer}: no feedback")
                continue

            print(f"[PEER] {peer}: {fb.comment} (w={fb.weight:.2f}, t={fb.t}, hint={repr(fb.delta_hint)})")
            theory, score, audit = agent.revise(theory, fb)
            sim = audit["sim_res"]
            print(f"[SCORE] jaccard={sim['jaccard']:.3f} overlap={sim['overlap']}/{sim['union']} theory:")
            print("        ", theory)

        # Skip novelty if locked to preserve the exact GT
        if agent.locked:
            print("[LOCK-IN] Skipping novelty injection — theory matches GT.")
            continue

        # Optional novelty/broadcast step between rounds
        agent.last_abstraction = {
            **(agent.last_abstraction or {}),
            "temperature": min(1.0, (agent.last_abstraction or {}).get("temperature", 0.2) + 0.05),
        }
        theory = agent.tg.generate(agent.last_abstraction or {})
        print(f"[NOVELTY] t={t} broadcast theory:\n          ", theory)

    print("\n" + "-" * 72)
    print("[DONE] Final theory:", theory)
    print("[HISTORY] adapt events:", len([h for h in mmr.history if h.get("event") == "adapt"]))
    print("[HISTORY] lock-in events:", len([h for h in mmr.history if h.get("event") == "lock_in"]))
    if mmr.history:
        last = mmr.history[-1]
        if last.get("event") == "adapt":
            print(f"         last adapt: score={last['score']:.3f} temp={last['temperature']:.2f} nudge={repr(last['nudge'])}")
        else:
            print("         last event:", last.get("event"))

if __name__ == "__main__":
    main()