<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/unified_ai_policy_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# unified_ai/policy.py
from __future__ import annotations
from typing import Dict
import torch
import torch.nn as nn

class TextualPolicyToAction:
    """Maps action strings to dense actions via learned embedding."""
    def __init__(self, action_dim: int = 32, actions: Dict[str, int] | None = None):
        self.actions = actions or {
            "move_left": 0,
            "move_right": 1,
            "move_up": 2,
            "move_down": 3,
            "pick": 4,
            "place": 5,
            "wait": 6,
            "scan": 7,
        }
        self.n = max(self.actions.values()) + 1
        self.emb = nn.Embedding(self.n, action_dim)
        with torch.no_grad():
            torch.nn.init.normal_(self.emb.weight, std=0.2)

    def __call__(self, action_text: str, device: torch.device, action_dim: int) -> torch.Tensor:
        idx = torch.tensor(self.actions.get(action_text, 0), device=device, dtype=torch.long)
        vec = self.emb(idx)  # (D,)
        if vec.size(-1) != action_dim:
            # Project if needed
            W = torch.empty(vec.size(-1), action_dim, device=device)
            nn.init.kaiming_uniform_(W, a=math.sqrt(5))
            vec = vec @ W
        return vec


class SimpleReward:
    """Reward = -||h - g||^2 where g is a learnable or provided goal state."""
    def __init__(self, latent: int = 256):
        self.goal = torch.zeros(latent)

    def set_goal(self, g: torch.Tensor):
        self.goal = g.detach()

    def __call__(self, h: torch.Tensor) -> torch.Tensor:
        g = self.goal.to(h.device).expand_as(h)
        return -((h - g) ** 2).sum(dim=-1)