<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/unifiedai_single_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
# unifiedai_single.py
# One-file, ready-to-run: Entangled Episodic Memory + LineSeek env + Q-learning policy
# + Reflection + Abstraction/Theory/Simulator/Meta-Validator + Curriculum tweaks.

from __future__ import annotations
import os, math, random, argparse
from dataclasses import dataclass, asdict
from typing import Optional, Tuple, Dict, Any, List

import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F


# ============================================================
# Utilities & Config
# ============================================================

def set_seed(seed: int):
    random.seed(seed); np.random.seed(seed); torch.manual_seed(seed); torch.cuda.manual_seed_all(seed)

def device_or_cpu(name: str) -> torch.device:
    if name == "cuda" and not torch.cuda.is_available():
        return torch.device("cpu")
    return torch.device(name)

@dataclass
class Config:
    # System
    seed: int = 7
    device: str = "cpu"
    episodes: int = 10
    steps_per_episode: int = 50
    gamma: float = 0.95
    verbose: bool = True

    # Policy/Exploration
    epsilon_start: float = 0.25
    epsilon_min: float = 0.02
    epsilon_decay: float = 0.98
    lr: float = 1e-3
    hidden_dim: int = 64

    # Memory (EEM)
    mem_slots: int = 256
    key_dim: int = 32
    value_dim: int = 16
    mem_temperature: float = 0.2
    mem_ema: float = 0.2
    mem_householder_layers: int = 1

    # Environment
    line_length: int = 31
    obs_noise_std: float = 0.1
    success_reward: float = 10.0
    step_penalty: float = -0.05

    # Reflection/Curriculum
    fail_recovery_eps_boost: float = 0.15


# ============================================================
# Entangled Episodic Memory (complex Hilbert space)
# ============================================================

def _as_complex(r: torch.Tensor, i: Optional[torch.Tensor] = None) -> torch.Tensor:
    if i is None: i = torch.zeros_like(r)
    return torch.complex(r, i)

def _norm_complex(z: torch.Tensor, eps: float = 1e-6, dim: int = -1) -> torch.Tensor:
    mag = torch.sqrt((z.real**2 + z.imag**2).sum(dim=dim, keepdim=True) + eps)
    return z / mag

def _phase_unitary(z: torch.Tensor, theta: torch.Tensor) -> torch.Tensor:
    # Element-wise complex phase: e^{iθ}
    phase = torch.complex(torch.cos(theta), torch.sin(theta))
    return z * phase

def _stack_householder(U: torch.Tensor, v: torch.Tensor) -> torch.Tensor:
    v = v / (v.norm() + 1e-6)
    proj = torch.matmul(U, v)
    return U - 2.0 * proj.unsqueeze(-1) * v

class EntangledEpisodicMemory(nn.Module):
    """
    Complex-key memory with measurement-based retrieval.
    p(slot | q) ∝ |<U(q), k>|^2 / τ
    """
    def __init__(
        self,
        slots: int = 256,
        key_dim: int = 32,
        value_dim: int = 16,
        temperature: float = 0.2,
        ema: float = 0.2,
        trainable_memory: bool = False,
        householder_layers: int = 1,
        dtype: torch.dtype = torch.float32,
        device: Optional[torch.device] = None,
    ):
        super().__init__()
        self.slots, self.key_dim, self.value_dim = slots, key_dim, value_dim
        self.temperature, self.ema = temperature, ema
        self.householder_layers = householder_layers

        init_keys_r = F.normalize(torch.randn(slots, key_dim, dtype=dtype, device=device), dim=-1)
        init_keys_i = F.normalize(torch.randn(slots, key_dim, dtype=dtype, device=device), dim=-1)
        init_values = torch.zeros(slots, value_dim, dtype=dtype, device=device)

        if trainable_memory:
            self.keys_r = nn.Parameter(init_keys_r)
            self.keys_i = nn.Parameter(init_keys_i)
            self.values = nn.Parameter(init_values)
        else:
            self.register_buffer("keys_r", init_keys_r)
            self.register_buffer("keys_i", init_keys_i)
            self.register_buffer("values", init_values)

        self.theta = nn.Parameter(torch.zeros(key_dim, dtype=dtype, device=device))

        if self.householder_layers > 0:
            self.house_v = nn.ParameterList(
                [nn.Parameter(F.normalize(torch.randn(2 * key_dim, dtype=dtype, device=device), dim=0))
                 for _ in range(self.householder_layers)]
            )

        self.register_buffer("age", torch.zeros(slots, dtype=torch.long, device=device))
        self.register_buffer("ptr", torch.zeros((), dtype=torch.long, device=device))

    def _apply_unitary(self, z_c: torch.Tensor) -> torch.Tensor:
        z_c = _phase_unitary(z_c, self.theta)
        if self.householder_layers > 0:
            re, im = z_c.real, z_c.imag
            cat = torch.cat([re, im], dim=-1)
            for v in self.house_v:
                cat = _stack_householder(cat, v)
            D = z_c.size(-1)
            z_c = torch.complex(cat[..., :D], cat[..., D:])
        return z_c

    def _similarity(self, q_c: torch.Tensor, k_c: torch.Tensor) -> torch.Tensor:
        qn = _norm_complex(q_c); kn = _norm_complex(k_c)
        scores = torch.abs(qn.unsqueeze(1) @ torch.conj(kn).unsqueeze(0).transpose(-1, -2)) ** 2
        return scores.squeeze(1)

    @torch.no_grad()
    def write(self, k: torch.Tensor, v: torch.Tensor, strategy: str = "nearest"):
        single = k.dim() == 1
        if single: k = k.unsqueeze(0); v = v.unsqueeze(0)
        k_c = k if torch.is_complex(k) else _as_complex(k)
        k_c = _norm_complex(self._apply_unitary(k_c))
        mem_c = _as_complex(self.keys_r, self.keys_i)
        if strategy == "ring":
            for i in range(k_c.size(0)):
                idx = int(self.ptr.item() % self.slots)
                self.keys_r[idx] = k_c[i].real; self.keys_i[idx] = k_c[i].imag
                self.values[idx] = v[i]; self.age[idx] = 0; self.ptr += 1
        elif strategy == "nearest":
            sims = self._similarity(k_c, mem_c)
            idxs = sims.argmax(dim=-1)
            for i, idx in enumerate(idxs.tolist()):
                self.keys_r[idx] = F.normalize((1 - self.ema) * self.keys_r[idx] + self.ema * k_c[i].real, dim=-1)
                self.keys_i[idx] = F.normalize((1 - self.ema) * self.keys_i[idx] + self.ema * k_c[i].imag, dim=-1)
                self.values[idx] = (1 - self.ema) * self.values[idx] + self.ema * v[i]
                self.age[idx] = 0
        else:
            raise ValueError("Unknown write strategy")
        self.age += 1

    def read(self, q: torch.Tensor, topk: int = 0, return_weights: bool = False):
        single = q.dim() == 1
        if single: q = q.unsqueeze(0)
        q_c = q if torch.is_complex(q) else _as_complex(q)
        q_c = self._apply_unitary(q_c)
        k_c = _as_complex(self.keys_r, self.keys_i)
        scores = self._similarity(q_c, k_c) / max(self.temperature, 1e-6)
        if topk and topk < self.slots:
            vals, idxs = scores.topk(topk, dim=-1)
            w = torch.softmax(vals, dim=-1)
            gathered = self.values[idxs]
            out = (w.unsqueeze(-1) * gathered).sum(dim=1); weights = w
        else:
            w = torch.softmax(scores, dim=-1)
            out = w @ self.values; weights = w
        if single:
            out = out.squeeze(0)
            if return_weights: weights = weights.squeeze(0)
        return (out, weights) if return_weights else (out, None)


# ============================================================
# Environment: 1D LineSeek
# ============================================================

@dataclass
class LineSeekConfig:
    line_length: int
    obs_noise_std: float
    success_reward: float
    step_penalty: float
    steps_per_episode: int

class LineSeekEnv:
    def __init__(self, cfg: LineSeekConfig, device: torch.device):
        assert cfg.line_length % 2 == 1, "line_length must be odd"
        self.cfg = cfg; self.device = device
        self.half = cfg.line_length // 2
        self.pos = 0; self.target = 0; self.t = 0

    def reset(self, seed: int | None = None):
        if seed is not None:
            random.seed(seed); torch.manual_seed(seed)
        self.pos = 0
        self.target = random.randint(-self.half, self.half)
        self.t = 0
        return self._obs()

    def _obs(self):
        dist = (self.target - self.pos) / max(1, self.half)
        noise = torch.randn(1, device=self.device).item() * self.cfg.obs_noise_std
        return torch.tensor([self.pos / self.half, dist + noise], device=self.device, dtype=torch.float32)

    def step(self, action: int):
        self.t += 1
        self.pos = max(-self.half, min(self.half, self.pos + int(action)))
        dist = self.target - self.pos
        done = (self.pos == self.target) or (self.t >= self.cfg.steps_per_episode)
        reward = self.cfg.step_penalty - abs(dist) / self.half
        success = False
        if self.pos == self.target:
            reward += self.cfg.success_reward; success = True
        obs = self._obs()
        info = {"success": success, "dist": dist, "t": self.t}
        return obs, reward, done, info


# ============================================================
# Policy: Q-learning
# ============================================================

class QNetwork(nn.Module):
    def __init__(self, obs_dim: int, hidden: int, actions: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(obs_dim, hidden), nn.ReLU(),
            nn.Linear(hidden, hidden), nn.ReLU(),
            nn.Linear(hidden, actions)
        )
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.net(x)

@dataclass
class PolicyConfig:
    obs_dim: int
    hidden_dim: int
    actions: int
    lr: float
    gamma: float
    epsilon: float
    epsilon_min: float
    epsilon_decay: float

class AgentPolicy:
    def __init__(self, cfg: PolicyConfig, device: torch.device):
        self.cfg = cfg; self.device = device
        self.q = QNetwork(cfg.obs_dim, cfg.hidden_dim, cfg.actions).to(device)
        self.opt = torch.optim.Adam(self.q.parameters(), lr=cfg.lr)
        self.epsilon = cfg.epsilon

    def select_action(self, obs: torch.Tensor) -> int:
        if torch.rand(()) < self.epsilon:
            return int(torch.randint(0, 3, (1,)).item() - 1)
        with torch.no_grad():
            q = self.q(obs.unsqueeze(0))
            a_idx = int(q.argmax(dim=-1).item())
            return a_idx - 1

    def update(self, s: torch.Tensor, a: int, r: float, s2: torch.Tensor, done: bool):
        a_idx = a + 1
        q = self.q(s.unsqueeze(0))
        q_sa = q[0, a_idx]
        with torch.no_grad():
            target = torch.tensor(r, device=self.device)
            if not done:
                q2 = self.q(s2.unsqueeze(0)).max(dim=-1).values[0]
                target = target + self.cfg.gamma * q2
        loss = F.smooth_l1_loss(q_sa, target)
        self.opt.zero_grad(); loss.backward(); self.opt.step()
        return float(loss.item())

    def decay_epsilon(self):
        self.epsilon = max(self.cfg.epsilon_min, self.epsilon * self.cfg.epsilon_decay)

    def boost_epsilon(self, amount: float):
        self.epsilon = min(0.9, self.epsilon + amount)


# ============================================================
# Meta loop utilities (abstraction/theory/simulator/validator)
# ============================================================

def abstract_state(obs: torch.Tensor) -> torch.Tensor:
    return obs

def identify_generalizations(episode: Dict[str, Any]) -> Dict[str, Any]:
    dists = episode.get("dists", [])
    trend = float(dists[-1] - dists[0]) if len(dists) >= 2 else 0.0
    return {"distance_trend": trend, "steps": len(dists)}

def construct_theory(abstractions: Dict[str, Any]) -> Dict[str, Any]:
    trend = abstractions.get("distance_trend", 0.0)
    return {"should_explore_more": trend >= 0.0}

def generate_simulators(theory: Dict[str, Any]) -> Dict[str, Any]:
    if theory.get("should_explore_more", False):
        return {"noise_candidates": [0.05, 0.1, 0.2]}
    return {"noise_candidates": [0.05, 0.1]}

def meta_validate(results: List[Dict[str, Any]]) -> Dict[str, Any]:
    if not results:
        return {"usefulness": 0.0, "parsimony": 1.0, "novelty": 0.0, "best": None}
    best = max(results, key=lambda r: (r.get("success_rate", 0.0), -r.get("avg_steps", 1e9)))
    return {"usefulness": 1.0, "parsimony": 0.8, "novelty": 0.2, "best": best}

def planning_cycle(mem: EntangledEpisodicMemory, episode_log: Dict[str, Any]) -> Dict[str, Any]:
    abstractions = identify_generalizations(episode_log)
    theory = construct_theory(abstractions)
    sims = generate_simulators(theory)
    results = [{"noise": n, "success_rate": 1.0 - n, "avg_steps": 10 + int(10 * n)} for n in sims["noise_candidates"]]
    meta = meta_validate(results)
    return {"abstractions": abstractions, "theory": theory, "meta": meta}


# ============================================================
# Reflection
# ============================================================

REFLECT_PROMPT = """You are a meta-cognitive agent.

Below is a transcript of your actions, states, and outcomes during the task.

Please:
1. Identify at least 2 failure points or suboptimal decisions.
2. Hypothesize why these failures occurred.
3. Propose improvements to policy, memory use, or planning strategy.

Transcript:
{log}
"""

def reflect_on_episode(agent_log: str, prompt_template: str) -> Dict[str, Any]:
    if "{log}" not in prompt_template:
        raise ValueError("prompt_template must contain '{log}'")
    prompt = prompt_template.format(log=agent_log)

    def _default_llm(p: str) -> str:
        lines = [ln.strip() for ln in p.splitlines() if ln.strip()]
        failures = [ln for ln in lines if any(w in ln.lower() for w in ["fail", "error", "dead-end", "stale"])]
        if not failures:
            failures = ["No explicit failure lines detected; reward curve suggests suboptimal exploration."]
        hyp = [
            "Insufficient situational memory caused plan drift.",
            "Under-exploration due to premature exploitation."
        ]
        imps = [
            "Increase retrieval top-k and apply recency weighting in writes.",
            "Boost epsilon temporarily after failures; decay when success stabilizes."
        ]
        return (
            "Failures:\n- " + "\n- ".join(failures[:3]) + "\n\n"
            "Hypotheses:\n- " + "\n- ".join(hyp) + "\n\n"
            "Improvements:\n- " + "\n- ".join(imps)
        )

    def _default_parse(s: str) -> Dict[str, Any]:
        out = {"failures": [], "hypotheses": [], "improvements": [], "raw": s}
        sec, buf = None, []
        def _flush():
            nonlocal sec, buf
            if sec and buf:
                out[sec] = [b.lstrip("- ").strip() for b in buf if b.strip()]
                buf = []
        for ln in s.splitlines():
            l = ln.strip()
            if not l: continue
            low = l.lower()
            if low.startswith("failures:"):
                _flush(); sec = "failures"
            elif low.startswith("hypotheses:"):
                _flush(); sec = "hypotheses"
            elif low.startswith("improvements:"):
                _flush(); sec = "improvements"
            else:
                buf.append(l)
        _flush()
        return out

    raw = _default_llm(prompt)
    return _default_parse(raw)


# ============================================================
# Encoder helpers
# ============================================================

def embed_obs(obs: torch.Tensor, key_dim: int) -> torch.Tensor:
    x = obs
    feats = [x]
    for k in range(1, 1 + max(1, key_dim // 4)):
        feats.append(torch.sin(k * x))
        feats.append(torch.cos(k * x))
    z = torch.cat(feats, dim=-1)
    if z.numel() < key_dim:
        z = torch.cat([z, torch.zeros(key_dim - z.numel(), device=obs.device)], dim=-1)
    return z[:key_dim]

def value_from_step(reward: float, action: int, dist: float, value_dim: int, device: torch.device) -> torch.Tensor:
    v = torch.zeros(value_dim, device=device)
    v[0] = float(reward); v[1] = float(action); v[2] = 1.0 if dist >= 0 else -1.0
    if value_dim > 3: v[3] = 1.0
    return v


# ============================================================
# Main loop
# ============================================================

def run(cfg: Config):
    device = device_or_cpu(cfg.device)
    set_seed(cfg.seed)
    print(f"[INFO] Device={device}, Episodes={cfg.episodes}, Steps/Ep={cfg.steps_per_episode}")

    # Env
    env_cfg = LineSeekConfig(
        line_length=cfg.line_length, obs_noise_std=cfg.obs_noise_std,
        success_reward=cfg.success_reward, step_penalty=cfg.step_penalty,
        steps_per_episode=cfg.steps_per_episode,
    )
    env = LineSeekEnv(env_cfg, device=device)

    # Policy
    pol_cfg = PolicyConfig(
        obs_dim=2, hidden_dim=cfg.hidden_dim, actions=3, lr=cfg.lr,
        gamma=cfg.gamma, epsilon=cfg.epsilon_start,
        epsilon_min=cfg.epsilon_min, epsilon_decay=cfg.epsilon_decay,
    )
    policy = AgentPolicy(pol_cfg, device=device)
    print(f"[INFO] Epsilon start={policy.epsilon:.3f}")

    # Memory
    mem = EntangledEpisodicMemory(
        slots=cfg.mem_slots, key_dim=cfg.key_dim, value_dim=cfg.value_dim,
        temperature=cfg.mem_temperature, ema=cfg.mem_ema,
        householder_layers=cfg.mem_householder_layers, device=device
    )

    for ep in range(1, cfg.episodes + 1):
        obs = env.reset(seed=cfg.seed + ep)
        total_r = 0.0
        agent_log_lines: List[str] = []
        dists: List[int] = []
        success = False

        for t in range(cfg.steps_per_episode):
            key = embed_obs(obs, cfg.key_dim).to(device)
            _, weights = mem.read(key, topk=8, return_weights=True)

            suggested = int(torch.sign(obs[1]).item())
            suggested = max(-1, min(1, suggested))

            act_from_policy = policy.select_action(obs)
            action = act_from_policy if torch.rand(()) < 0.7 else suggested

            next_obs, reward, done, info = env.step(action)
            total_r += reward; dists.append(int(info["dist"]))

            loss = policy.update(obs, action, reward, next_obs, done)

            v = value_from_step(reward, action, float(info["dist"]), cfg.value_dim, device=device)
            mem.write(key, v, strategy="nearest")

            agent_log_lines.append(
                f"t={t:02d} pos={float(obs[0]):+.2f} dist={int(info['dist']):+d} act={action:+d} r={reward:+.2f} loss={loss:.3f}"
            )

            obs = next_obs
            if done: success = info.get("success", False); break

        status = "SUCCESS" if success else "FAIL"
        print(f"[EP {ep:02d}] {status} total_reward={total_r:+.2f} steps={len(agent_log_lines):d} epsilon={policy.epsilon:.3f}")

        # Reflection & curriculum
        if not success:
            agent_log = "\n".join(agent_log_lines) + "\nOutcome: Goal not achieved; time limit exceeded."
            reflection = reflect_on_episode(agent_log, REFLECT_PROMPT)
            print("[REFLECTION] Failures:", reflection.get("failures", [])[:2])
            print("[REFLECTION] Improvements:", reflection.get("improvements", [])[:2])
            policy.boost_epsilon(cfg.fail_recovery_eps_boost)
        else:
            policy.decay_epsilon()

        # Planning cycle (meta)
        episode_log = {"dists": dists, "success": success, "total_reward": total_r}
        meta_out = planning_cycle(mem, episode_log)
        best_sim = meta_out["meta"].get("best")
        if best_sim:
            mem.temperature = max(0.05, min(0.5, 0.1 + 0.5 * (best_sim["noise"])))
        if cfg.verbose:
            print(f"[PLAN] abstractions={meta_out['abstractions']} | theory={meta_out['theory']} | mem.tau={mem.temperature:.3f}")

    print("\n=== SUMMARY ===")
    print(f"Config: {asdict(cfg)}")
    print(f"Final epsilon: {policy.epsilon:.3f}")
    print("[DONE]")

def build_parser():
    import argparse
    p = argparse.ArgumentParser(
        add_help=True,
        description="UnifiedAI single-file demo (Notebook/CLI friendly)"
    )
    p.add_argument("--episodes", type=int, default=10)
    p.add_argument("--steps_per_episode", type=int, default=50)
    p.add_argument("--device", choices={"cpu", "cuda"}, default="cpu")
    p.add_argument("--epsilon_start", type=float, default=0.25)
    p.add_argument("--epsilon_min", type=float, default=0.02)
    p.add_argument("--epsilon_decay", type=float, default=0.98)
    p.add_argument("--lr", type=float, default=1e-3)
    p.add_argument("--hidden_dim", type=int, default=64)
    p.add_argument("--mem_slots", type=int, default=256)
    p.add_argument("--key_dim", type=int, default=32)
    p.add_argument("--value_dim", type=int, default=16)
    p.add_argument("--mem_temperature", type=float, default=0.2)
    p.add_argument("--mem_ema", type=float, default=0.2)
    p.add_argument("--mem_householder_layers", type=int, default=1)
    p.add_argument("--line_length", type=int, default=31)
    p.add_argument("--obs_noise_std", type=float, default=0.1)
    p.add_argument("--success_reward", type=float, default=10.0)
    p.add_argument("--step_penalty", type=float, default=-0.05)
    p.add_argument("--fail_recovery_eps_boost", type=float, default=0.15)
    p.add_argument("--seed", type=int, default=7)
    p.add_argument(
        "--verbose",
        type=lambda x: str(x).lower() in {"1", "true", "yes", "y"},
        default=True
    )
    return p


def main():
    import os
    parser = build_parser()
    args, _ = parser.parse_known_args()
    cfg = Config(
        seed=args.seed,
        device=args.device,
        episodes=args.episodes,
        steps_per_episode=args.steps_per_episode,
        epsilon_start=args.epsilon_start,
        epsilon_min=args.epsilon_min,
        epsilon_decay=args.epsilon_decay,
        lr=args.lr,
        hidden_dim=args.hidden_dim,
        mem_slots=args.mem_slots,
        key_dim=args.key_dim,
        value_dim=args.value_dim,
        mem_temperature=args.mem_temperature,
        mem_ema=args.mem_ema,
        mem_householder_layers=args.mem_householder_layers,
        line_length=args.line_length,
        obs_noise_std=args.obs_noise_std,
        success_reward=args.success_reward,
        step_penalty=args.step_penalty,
        fail_recovery_eps_boost=args.fail_recovery_eps_boost,
        verbose=args.verbose,
    )
    os.environ.setdefault("PYTHONHASHSEED", "0")
    run(cfg)


if __name__ == "__main__":
    main()