<a href="https://colab.research.google.com/github/OneFineStarstuff/Pinn/blob/main/MyAGIAgent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Single-file agent with a shared representation for regression and multimodal matching.
# - Trainable PyTorch projections align text and image into a shared space.
# - Regression uses a numeric -> shared -> scalar head.
# - Grid/Symbol use lightweight heuristics to keep the harness runnable.
# - Safe to drop into your harness; includes minimal fallbacks if base classes are missing.

import os, time, random, math
from dataclasses import dataclass
import numpy as np

# Torch setup
try:
    import torch
    import torch.nn as nn
    import torch.nn.functional as F
    TORCH_AVAILABLE = True
except Exception:
    TORCH_AVAILABLE = False

SEED = 1234
random.seed(SEED); np.random.seed(SEED)
if TORCH_AVAILABLE:
    torch.manual_seed(SEED)
DEVICE = "cuda" if TORCH_AVAILABLE and torch.cuda.is_available() else "cpu"
RUN_ID = f"run_{int(time.time())}"

# ---------------------------------------------------------------------
# Harness base classes (only defined if they don't already exist)
# ---------------------------------------------------------------------
try:
    AgentAdapter  # type: ignore[name-defined]
except NameError:
    @dataclass
    class AgentConfig:
        name: str = "MyAGIAgent"
        use_world_model: bool = False
        use_multimodal_encoder: bool = True
        mc_dropout_passes: int = 0
        reflection_steps: int = 1
        notes: str = ""

    class AgentAdapter:
        def __init__(self, cfg: AgentConfig):
            self.cfg = cfg
        def act(self, obs: dict, task_id: str, step: int, state: dict | None):
            raise NotImplementedError
        def learn(self, batch: dict, task_id: str) -> dict:
            return {"learned": False}
        def reflect(self, logs: list[dict]) -> dict:
            return {"notes": "no-op", "patches": {}}
        def encode(self, modality: str, data):
            raise NotImplementedError
        def imagine(self, state: dict, n_steps: int = 5):
            return []

# ---------------------------------------------------------------------
# Core AGI agent with shared encoder
# ---------------------------------------------------------------------
class MyAGIAgent(nn.Module):
    """
    Shared-representation agent:
      - Text -> bag-of-chars (64) -> Linear -> shared_dim
      - Image -> downsample 32x32 -> flatten(1024) -> Linear -> shared_dim
      - Numeric (regression x) -> Linear -> shared_dim
      - Heads: policy(shared->logits), regressor(shared->1)
    """
    def __init__(self, shared_dim: int = 128, txt_dim: int = 64, img_hw: int = 32, eps: float = 0.1):
        super().__init__()
        self.shared_dim = shared_dim
        self.txt_dim = txt_dim
        self.img_hw = img_hw
        self.img_dim = img_hw * img_hw  # grayscale
        self.eps = eps

        # Shared MLP trunk (applied after modality-specific projection)
        self.trunk = nn.Sequential(
            nn.LayerNorm(shared_dim),
            nn.ReLU(),
            nn.Linear(shared_dim, shared_dim),
            nn.ReLU(),
        )

        # Modality projections
        self.txt_proj = nn.Linear(txt_dim, shared_dim)
        self.img_proj = nn.Linear(self.img_dim, shared_dim)
        self.num_proj = nn.Linear(1, shared_dim)

        # Heads
        self.policy_head = nn.Linear(shared_dim, 4)  # up/down/left/right
        self.reg_head = nn.Linear(shared_dim, 1)

        # Temperature for contrastive similarity
        self.logit_scale = nn.Parameter(torch.log(torch.tensor(10.0)))  # ~e^2.3 ≈ 10

        # Simple optimizer sets (split so you can tune separately if desired)
        self.reg_opt = torch.optim.Adam(list(self.num_proj.parameters()) + list(self.reg_head.parameters()) + list(self.trunk.parameters()), lr=5e-3)
        self.mm_opt = torch.optim.Adam(list(self.txt_proj.parameters()) + list(self.img_proj.parameters()) + list(self.trunk.parameters()) + [self.logit_scale], lr=1e-3)

        # Char vocab for text featurization
        alphabet = "abcdefghijklmnopqrstuvwxyz0123456789_- "
        self.char2idx = {c: i + 1 for i, c in enumerate(alphabet)}  # 0 reserved

    # ---- Featurizers ----
    def featurize_text(self, s: str) -> torch.Tensor:
        vec = np.zeros(self.txt_dim, dtype=np.float32)
        for ch in s or "":
            vec[self.char2idx.get(ch.lower(), 0) % self.txt_dim] += 1.0
        vec = vec / (np.linalg.norm(vec) + 1e-8)
        return torch.from_numpy(vec).to(DEVICE)

    def featurize_image(self, img: np.ndarray) -> torch.Tensor:
        # Expect HxWxC or HxW; downsample to img_hw x img_hw (grayscale) using torch interpolate
        arr = torch.from_numpy(np.asarray(img)).float()
        if arr.ndim == 2:
            arr = arr.unsqueeze(0)  # 1,H,W
        elif arr.ndim == 3:
            if arr.shape[-1] in (1, 3):  # H,W,C -> C,H,W
                arr = arr.permute(2, 0, 1)
            else:
                # Unknown last dim, collapse to 1 channel
                arr = arr.mean(dim=-1, keepdim=True).permute(2, 0, 1)
        else:
            # Fallback: flatten and pad/trim later
            flat = arr.flatten()
            if flat.numel() < self.img_dim:
                flat = F.pad(flat, (0, self.img_dim - flat.numel()))
            else:
                flat = flat[: self.img_dim]
            flat = flat / (flat.norm() + 1e-8)
            return flat.to(DEVICE)

        arr = arr.unsqueeze(0)  # N=1,C,H,W
        with torch.no_grad():
            arr = F.interpolate(arr, size=(self.img_hw, self.img_hw), mode="bilinear", align_corners=False)
        gray = arr.mean(dim=1, keepdim=False).squeeze(0)  # H,W
        vec = gray.flatten()
        vec = vec / (vec.norm() + 1e-8)
        return vec.to(DEVICE)

    def featurize_num(self, x: np.ndarray) -> torch.Tensor:
        # x: [B,1] numpy -> torch
        t = torch.from_numpy(x).float().to(DEVICE)
        return t

    # ---- Encoders to shared space ----
    def encode_text_shared(self, s: str) -> torch.Tensor:
        z = self.txt_proj(self.featurize_text(s))
        return self.trunk(z)

    def encode_img_shared(self, img: np.ndarray) -> torch.Tensor:
        z = self.img_proj(self.featurize_image(img))
        return self.trunk(z)

    def encode_num_shared(self, x_batch: np.ndarray) -> torch.Tensor:
        z = self.num_proj(self.featurize_num(x_batch))  # [B, shared]
        # Apply trunk per row
        return self.trunk(z)

    # ---- Heads ----
    @torch.no_grad()
    def act_regress(self, x_batch: np.ndarray) -> np.ndarray:
        self.eval()
        z = self.encode_num_shared(x_batch)  # [B, D]
        y = self.reg_head(z)                 # [B, 1]
        return y.cpu().numpy()

    @torch.no_grad()
    def act_multimodal_txt2img(self, text: str, images: list[np.ndarray]) -> int:
        self.eval()
        tq = self.encode_text_shared(text)
        sims = []
        scale = self.logit_scale.exp().clamp(1.0, 100.0)
        for img in images:
            iv = self.encode_img_shared(img)
            s = F.cosine_similarity(tq, iv, dim=0) * scale
            sims.append(float(s.item()))
        return int(np.argmax(sims))

    @torch.no_grad()
    def act_multimodal_img2txt(self, image: np.ndarray, texts: list[str]) -> int:
        self.eval()
        iq = self.encode_img_shared(image)
        sims = []
        scale = self.logit_scale.exp().clamp(1.0, 100.0)
        for t in texts:
            tv = self.encode_text_shared(t)
            s = F.cosine_similarity(iq, tv, dim=0) * scale
            sims.append(float(s.item()))
        return int(np.argmax(sims))

    # ---- Learning steps ----
    def step_regression(self, x: np.ndarray, y: np.ndarray) -> dict:
        self.train()
        z = self.encode_num_shared(x)  # [B,D]
        pred = self.reg_head(z).squeeze(-1)  # [B]
        target = torch.from_numpy(y.squeeze(-1)).float().to(DEVICE)
        loss = F.mse_loss(pred, target)
        self.reg_opt.zero_grad(set_to_none=True)
        loss.backward()
        self.reg_opt.step()
        return {"loss": float(loss.item())}

    def step_multimodal_txt2img(self, text: str, images: list[np.ndarray], target_idx: int) -> dict:
        self.train()
        tq = self.encode_text_shared(text)                 # [D]
        ims = torch.stack([self.encode_img_shared(im) for im in images])  # [K,D]
        scale = self.logit_scale.exp().clamp(1.0, 100.0)
        logits = F.cosine_similarity(ims, tq.unsqueeze(0), dim=1) * scale  # [K]
        target = torch.tensor([target_idx], device=DEVICE)
        loss = F.cross_entropy(logits.unsqueeze(0), target)
        self.mm_opt.zero_grad(set_to_none=True)
        loss.backward()
        self.mm_opt.step()
        return {"loss": float(loss.item())}

    def step_multimodal_img2txt(self, image: np.ndarray, texts: list[str], target_idx: int) -> dict:
        self.train()
        iq = self.encode_img_shared(image)                 # [D]
        txs = torch.stack([self.encode_text_shared(t) for t in texts])  # [K,D]
        scale = self.logit_scale.exp().clamp(1.0, 100.0)
        logits = F.cosine_similarity(txs, iq.unsqueeze(0), dim=1) * scale  # [K]
        target = torch.tensor([target_idx], device=DEVICE)
        loss = F.cross_entropy(logits.unsqueeze(0), target)
        self.mm_opt.zero_grad(set_to_none=True)
        loss.backward()
        self.mm_opt.step()
        return {"loss": float(loss.item())}

# ---------------------------------------------------------------------
# Adapter wiring to your harness interfaces
# ---------------------------------------------------------------------
class MyAGIAgentAdapter(AgentAdapter):
    def __init__(self, cfg: "AgentConfig"):
        super().__init__(cfg)
        if not TORCH_AVAILABLE:
            raise RuntimeError("This agent requires PyTorch.")
        self.agent = MyAGIAgent(shared_dim=128, txt_dim=64, img_hw=32, eps=0.1).to(DEVICE)
        self.eps = 0.1  # used for grid heuristic exploration

    def act(self, obs: dict, task_id: str, step: int, state: dict | None):
        # Grid: choose from provided action_space, using a goal-directed heuristic with epsilon
        if task_id.startswith("grid"):
            acts = obs["action_space"]
            if random.random() < self.eps:
                a = random.choice(acts)
            else:
                pos = obs.get("pos"); goal = obs.get("goal")
                def heuristic(act):
                    dx, dy = {"up":(-1,0), "down":(1,0), "left":(0,-1), "right":(0,1)}.get(act,(0,0))
                    nxt = (pos[0]+dx, pos[1]+dy)
                    if goal is not None:
                        return abs(nxt[0]-goal[0]) + abs(nxt[1]-goal[1])
                    return 0.0
                a = min(acts, key=heuristic)
            return a, (state or {}), {}

        # Symbol: pick rule that best reduces proxy distance to target (fallback random)
        if task_id.startswith("symbol"):
            rules = obs["rules"]; cur = obs["string"]; tgt = obs["target"]
            best = None; best_score = math.inf
            for (lhs, rhs) in rules:
                idx = cur.find(lhs)
                if idx >= 0:
                    new_s = cur[:idx] + rhs + cur[idx+len(lhs):]
                    score = abs(len(new_s)-len(tgt)) + sum(1 for a,b in zip(new_s, tgt) if a!=b)
                    if score < best_score:
                        best = (lhs, rhs); best_score = score
            if best is None:
                best = random.choice(rules) if rules else None
            return best, (state or {}), {}

        # Regression: obs["x"] -> predictions [B,1] ndarray
        if task_id.startswith("regress"):
            x = obs["x"]  # np.array [B,1]
            preds = self.agent.act_regress(x)
            return preds, (state or {}), {}

        # Multimodal matching: return plain int index
        if task_id.startswith("multimodal"):
            mode = obs.get("mode")
            if mode == "txt2img":
                idx = self.agent.act_multimodal_txt2img(obs["text"], obs["images"])
                return int(idx), (state or {}), {}
            else:
                idx = self.agent.act_multimodal_img2txt(obs["image"], obs["texts"])
                return int(idx), (state or {}), {}

        return None, (state or {}), {}

    def learn(self, batch: dict, task_id: str) -> dict:
        if task_id.startswith("regress"):
            return self.agent.step_regression(batch["x"], batch["y"])
        if task_id.startswith("multimodal"):
            mode = batch.get("mode")
            if mode == "txt2img":
                return self.agent.step_multimodal_txt2img(batch["text"], batch["images"], batch["target_idx"])
            else:
                return self.agent.step_multimodal_img2txt(batch["image"], batch["texts"], batch["target_idx"])
        return {"loss": None}

    def reflect(self, logs: list[dict]) -> dict:
        # Adjust exploration based on recent failures
        fails = [1.0 if r.get("success")==0 else 0.0 for r in logs if "success" in r]
        fail_rate = float(np.mean(fails)) if fails else 0.0
        if fail_rate > 0.5:
            self.eps = min(0.3, self.eps + 0.05)
            note = f"Increased epsilon to {self.eps:.2f} after fail_rate={fail_rate:.2f}"
        else:
            note = f"No change; fail_rate={fail_rate:.2f}"
        return {"notes": note, "patches": {"eps": self.eps}}

    def encode(self, modality: str, data):
        if modality == "text":
            with torch.no_grad():
                v = self.agent.encode_text_shared(data)
                return v.detach().cpu().numpy()
        if modality == "image":
            with torch.no_grad():
                v = self.agent.encode_img_shared(data)
                return v.detach().cpu().numpy()
        raise ValueError(f"Unknown modality: {modality}")

    def imagine(self, state: dict, n_steps: int = 5):
        # Placeholder: no world model yet
        return []

# ---------------------------------------------------------------------
# Optional: run if your harness is available; otherwise just define agent.
# ---------------------------------------------------------------------
if __name__ == "__main__":
    if "run_all" in globals():
        cfg = AgentConfig(
            name="MyAGIAgent",
            use_world_model=False,
            use_multimodal_encoder=True,
            notes="shared-encoder, trainable multimodal projections"
        )
        agent = MyAGIAgentAdapter(cfg)
        out = run_all(agent)  # expected to return (df, refl_notes, sym_test, grid_test, sin_test)
        try:
            df = out[0]
            dashboard(df)
        except Exception:
            pass
        print("Run complete:", RUN_ID)
    else:
        print("Defined MyAGIAgentAdapter. Integrate with your harness via: df, refl_notes, sym_test, grid_test, sin_test = run_all(MyAGIAgentAdapter(AgentConfig(...))).")