<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/agent_lockin_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
End-to-end agent with:
- requires_human_review decorator (forwards *args, **kwargs)
- Adaptive temperature based on score
- Lock-in: once GT is matched (or fully covered with high score), snap to GT and freeze exploration
- Optional blended peer feedback per step
- Audit history export (JSON/CSV)

Run examples:
  python agent_lockin.py
  python agent_lockin.py --blend-feedback
  python agent_lockin.py --steps 8 --seed 7 --history runs/history.json
"""

from __future__ import annotations

import argparse
import csv
import json
import os
import random
import sys
import time
from dataclasses import dataclass
from functools import wraps
from typing import Any, Dict, List, Optional, Tuple

# -------------------------------
# Configuration
# -------------------------------

@dataclass
class Config:
    human_review_thresh: float = 0.20
    coverage_lock_score: float = 0.95
    min_temperature: float = 0.01
    novelty_increment: float = 0.05
    seed: int = 42
    steps: int = 5
    history_path: str = "runs/history.json"
    blend_feedback: bool = False

CFG = Config()

# -------------------------------
# Decorators
# -------------------------------

def requires_human_review(if_score_below: float):
    """Decorator that prints a review notice for low scores, forwarding all args/kwargs."""
    def deco(fn):
        @wraps(fn)
        def wrapped(self, kernel, reason, *args, **kwargs):
            if reason < if_score_below:
                print(f"[HUMAN-REVIEW] score={reason:.3f} < threshold={if_score_below:.3f} — flagging before adapt()")
            return fn(self, kernel, reason, *args, **kwargs)
        return wrapped
    return deco

# -------------------------------
# Data structures
# -------------------------------

@dataclass
class Feedback:
    from_peer: str
    t: int
    comment: str
    delta_hint: Optional[str] = None
    weight: float = 1.0

# -------------------------------
# Engines and components
# -------------------------------

class SimilarityEngine:
    """
    Set-based Jaccard similarity:
      - Tokens are whitespace-lowered unique sets (duplicate words collapse)
      - overlap = |T ∩ G|
      - union   = |T ∪ G|
      - jaccard = overlap / union
    """
    def test(self, theory: str, gt: str) -> Dict[str, Any]:
        tset = set(theory.lower().split())
        gset = set(gt.lower().split())
        inter = len(tset & gset)
        union = max(1, len(tset | gset))
        return {
            "tokens_theory": len(tset),
            "tokens_gt": len(gset),
            "jaccard": inter / union,
            "overlap": inter,
            "union": union,
        }

class ScoringEngine:
    def evaluate(self, theory: str, sim_res: Dict[str, Any], gt: str) -> float:
        return float(sim_res.get("jaccard", 0.0))

class TheoryGenerator:
    """
    Interleaves a fixed 'core' phrase with sampled vocabulary.
    Keeps duplicated 'the' in core to enable exact GT reconstruction.
    """
    def __init__(self, vocabulary: Optional[List[str]] = None):
        self.vocab = vocabulary or [
            "the", "quick", "brown", "fox", "jumps", "over", "lazy", "dog",
            "smart", "agile", "beyond", "robust", "audit", "traceable", "reliable",
            "policy", "signal", "contract", "governed", "reviewed", "clear"
        ]

    def generate(self, abstraction: Dict[str, Any]) -> str:
        # Snap-to-GT if requested by upstream lock-in
        force_gt = abstraction.get("force_gt")
        if isinstance(force_gt, str) and force_gt:
            return force_gt

        temperature = float(abstraction.get("temperature", 0.2))
        nudge = abstraction.get("nudge")

        # Sample k tokens based on temperature
        k = min(9, max(3, int(3 + temperature * 6)))
        tokens = random.sample(self.vocab, k=k)
        if isinstance(nudge, str) and nudge:
            # Ensure the nudge appears early in the sequence
            tokens = [nudge] + [tok for tok in tokens if tok != nudge]

        # Core phrase (duplicate 'the' intentionally preserved)
        core = ["the", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog"]

        # Interleave: after every second core token, insert one sampled token if available
        mix: List[str] = []
        token_idx = 0
        for i, tok in enumerate(core):
            mix.append(tok)
            if i % 2 == 1 and token_idx < len(tokens):
                mix.append(tokens[token_idx])
                token_idx += 1

        return " ".join(mix)

class MemoryModelReviser:
    """
    Translates scores + feedback into generator abstraction changes.
    Logs every adaptation and lock-in for auditability.
    """
    def __init__(self):
        self.history: List[Dict[str, Any]] = []

    @requires_human_review(if_score_below=CFG.human_review_thresh)
    def adapt(self, kernel: "Agent", reason: float, feedback: Optional[Feedback] = None) -> None:
        temperature = max(CFG.min_temperature, min(1.0, 1.0 - reason))
        nudge = feedback.delta_hint if feedback and feedback.delta_hint else None

        kernel.last_abstraction = {
            "temperature": temperature,
            "nudge": nudge,
            "last_score": reason,
            "last_feedback_peer": getattr(feedback, "from_peer", None),
            "t": getattr(feedback, "t", None),
        }

        self.history.append({
            "event": "adapt",
            "ts": time.time(),
            "score": reason,
            "temperature": temperature,
            "nudge": nudge,
            "peer": getattr(feedback, "from_peer", None),
            "t": getattr(feedback, "t", None),
        })

        print(f"[ADAPT] score={reason:.3f} -> temp={temperature:.2f} nudge={repr(nudge)} "
              f"peer={getattr(feedback, 'from_peer', None)} t={getattr(feedback, 't', None)}")

    def record_lock_in(self, theory: str, gt: str) -> None:
        self.history.append({
            "event": "lock_in",
            "ts": time.time(),
            "score": 1.0 if theory.strip().lower() == gt.strip().lower() else None,
            "note": "Snapped to canonical GT and froze exploration."
        })

# -------------------------------
# Agent
# -------------------------------

class Agent:
    """
    Orchestrates:
      - similarity → score
      - lock-in detection (exact match or full coverage with high score)
      - adapt with feedback
      - generate next theory
      - freeze once locked
    """
    def __init__(self, tg: TheoryGenerator, simi: SimilarityEngine, ste: ScoringEngine,
                 mmr: MemoryModelReviser, gt: str) -> None:
        self.tg = tg
        self.simi = simi
        self.ste = ste
        self.mmr = mmr
        self.gt = gt
        self.last_abstraction: Optional[Dict[str, Any]] = None
        self.locked: bool = False

    def _snap_to_gt_and_lock(self) -> None:
        self.locked = True
        self.last_abstraction = {"temperature": CFG.min_temperature, "nudge": None, "force_gt": self.gt, "last_score": 1.0}
        self.mmr.record_lock_in(self.gt, self.gt)
        print("[LOCK-IN] Snapping to canonical GT and freezing exploration.")

    def revise(self, theory: str, feedback: Optional[Feedback]) -> Tuple[str, float, Dict[str, Any]]:
        # If already locked, keep returning canonical GT
        if self.locked:
            sim_res = self.simi.test(theory, self.gt)
            score = 1.0 if theory.strip().lower() == self.gt.strip().lower() else self.ste.evaluate(theory, sim_res, self.gt)
            return self.gt, score, {"sim_res": sim_res, "locked_in": True}

        sim_res = self.simi.test(theory, self.gt)
        score = self.ste.evaluate(theory, sim_res, self.gt)

        # Exact match → lock
        if theory.strip().lower() == self.gt.strip().lower():
            self._snap_to_gt_and_lock()
            return self.gt, 1.0, {"sim_res": sim_res, "locked_in": True}

        # Full unique-token coverage with high score and no conflicting hint → lock
        full_coverage = (sim_res["overlap"] == sim_res["tokens_gt"])
        if full_coverage and score >= CFG.coverage_lock_score and (not feedback or not feedback.delta_hint):
            self._snap_to_gt_and_lock()
            return self.gt, 1.0, {"sim_res": sim_res, "locked_in": True}

        # Otherwise adapt and generate
        self.mmr.adapt(self, reason=score, feedback=feedback)
        new_theory = self.tg.generate(self.last_abstraction or {})

        # If generation naturally hits exact GT, lock immediately
        if new_theory.strip().lower() == self.gt.strip().lower():
            self._snap_to_gt_and_lock()
            return self.gt, 1.0, {"sim_res": sim_res, "locked_in": True}

        return new_theory, score, {"sim_res": sim_res, "locked_in": False}

# -------------------------------
# Peer validation and blending
# -------------------------------

def peer_validate(peer: str, theory: str, gt: str, t: int) -> Optional[Feedback]:
    gt_tokens = gt.lower().split()
    thy_tokens = set(theory.lower().split())
    missing = [tok for tok in gt_tokens if tok not in thy_tokens]
    if not missing:
        return Feedback(peer, t, "Looks aligned; keep consolidating.", None, 0.8)

    candidate = None
    for tok in ["quick", "brown", "fox", "jumps", "lazy", "dog"]:
        if tok in missing:
            candidate = tok
            break
    candidate = candidate or random.choice(missing)
    weight = max(0.4, 1.2 - 0.1 * t)
    return Feedback(peer, t, f"Missing token '{candidate}'. Consider incorporating it.", candidate, weight)

def blend_feedback(feedbacks: List[Feedback]) -> Optional[Feedback]:
    if not feedbacks:
        return None
    # Weighted vote for delta_hint tokens
    weights: Dict[str, float] = {}
    for fb in feedbacks:
        if fb.delta_hint:
            weights[fb.delta_hint] = weights.get(fb.delta_hint, 0.0) + fb.weight
    top_hint = max(weights, key=weights.get) if weights else None
    avg_w = sum(fb.weight for fb in feedbacks) / len(feedbacks)
    return Feedback(
        from_peer="blend",
        t=max(fb.t for fb in feedbacks),
        comment=f"Blended hint={top_hint}",
        delta_hint=top_hint,
        weight=avg_w
    )

# -------------------------------
# History export
# -------------------------------

def export_history(mmr: "MemoryModelReviser", path: str) -> None:
    try:
        os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
        base, ext = os.path.splitext(path)
        if ext.lower() == ".json":
            with open(path, "w", encoding="utf-8") as f:
                json.dump(mmr.history, f, ensure_ascii=False, indent=2)
            print(f"[EXPORT] Wrote history JSON -> {path}")
        elif ext.lower() == ".csv":
            if not mmr.history:
                with open(path, "w", newline="", encoding="utf-8") as f:
                    pass
                print(f"[EXPORT] No history; created empty CSV -> {path}")
                return
            keys = sorted({k for row in mmr.history for k in row.keys()})
            with open(path, "w", newline="", encoding="utf-8") as f:
                w = csv.DictWriter(f, fieldnames=keys)
                w.writeheader()
                for row in mmr.history:
                    w.writerow(row)
            print(f"[EXPORT] Wrote history CSV -> {path}")
        else:
            print(f"[EXPORT] Unsupported extension for {path}; skipping.")
    except Exception as e:
        print(f"[EXPORT] Failed to write history: {e}")

# -------------------------------
# Main
# -------------------------------

def parse_args(argv: Optional[List[str]] = None) -> argparse.Namespace:
    p = argparse.ArgumentParser(description="Agent with lock-in, human-review decorator, and audit export.")
    p.add_argument("--steps", type=int, default=CFG.steps, help="Number of time steps")
    p.add_argument("--seed", type=int, default=CFG.seed, help="Random seed")
    p.add_argument("--blend-feedback", action="store_true", help="Blend peer feedback per step instead of sequential revises")
    p.add_argument("--history", type=str, default=CFG.history_path, help="Path to write history (.json or .csv)")
    return p.parse_args(argv)

def main(argv: Optional[List[str]] = None):
    args = parse_args(argv)
    random.seed(args.seed)

    # Apply CLI overrides to CFG-like behavior (kept simple)
    steps = int(args.steps)
    history_path = args.history
    use_blend = bool(args.blend_feedback)

    X_true = "the quick brown fox jumps over the lazy dog"

    tg = TheoryGenerator()
    simi = SimilarityEngine()
    ste = ScoringEngine()
    mmr = MemoryModelReviser()
    agent = Agent(tg=tg, simi=simi, ste=ste, mmr=mmr, gt=X_true)

    theory = "brown fox jumps beyond policy"
    peers = ["alpha", "beta", "gamma"]

    print("[INIT] gt:", X_true)
    print("[INIT] theory0:", theory)
    print("-" * 72)

    for t in range(1, steps + 1):
        print(f"\n[STEP] t={t}")

        if use_blend:
            # Gather peer feedback first, blend, then revise once
            batch = [peer_validate(peer, theory, X_true, t) for peer in peers]
            batch = [b for b in batch if b]
            for b in batch:
                print(f"[PEER] {b.from_peer}: {b.comment} (w={b.weight:.2f}, t={b.t}, hint={repr(b.delta_hint)})")
            fb = blend_feedback(batch)
            theory, score, audit = agent.revise(theory, fb)
            sim = audit["sim_res"]
            print(f"[SCORE] jaccard={sim['jaccard']:.3f} overlap={sim['overlap']}/{sim['union']} theory:")
            print("        ", theory)
        else:
            # Sequential peer feedback with immediate revise after each
            for peer in peers:
                fb = peer_validate(peer, theory, X_true, t)
                if not fb:
                    print(f"[PEER] {peer}: no feedback")
                    continue
                print(f"[PEER] {peer}: {fb.comment} (w={fb.weight:.2f}, t={fb.t}, hint={repr(fb.delta_hint)})")
                theory, score, audit = agent.revise(theory, fb)
                sim = audit["sim_res"]
                print(f"[SCORE] jaccard={sim['jaccard']:.3f} overlap={sim['overlap']}/{sim['union']} theory:")
                print("        ", theory)

        # Skip novelty if locked to preserve exact GT
        if agent.locked:
            print("[LOCK-IN] Skipping novelty injection — theory matches GT.")
            continue

        # Optional novelty/broadcast step between rounds
        agent.last_abstraction = {
            **(agent.last_abstraction or {}),
            "temperature": min(1.0, (agent.last_abstraction or {}).get("temperature", 0.2) + CFG.novelty_increment),
        }
        theory = agent.tg.generate(agent.last_abstraction or {})
        print(f"[NOVELTY] t={t} broadcast theory:\n          ", theory)

    print("\n" + "-" * 72)
    print("[DONE] Final theory:", theory)
    print("[HISTORY] adapt events:", len([h for h in mmr.history if h.get("event") == "adapt"]))
    print("[HISTORY] lock-in events:", len([h for h in mmr.history if h.get("event") == "lock_in"]))
    if mmr.history:
        last = mmr.history[-1]
        if last.get("event") == "adapt":
            print(f"         last adapt: score={last['score']:.3f} temp={last['temperature']:.2f} nudge={repr(last['nudge'])}")
        else:
            print("         last event:", last.get("event"))

    # Export audit history
    export_history(mmr, history_path)

if __name__ == "__main__":
    sys.exit(main())