<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/Grid.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, Iterable, Iterator, List, Optional, Protocol, Tuple
from collections import deque, Counter

# =========================
# Episode and report types
# =========================

@dataclass
class Episode:
    input: str
    outcome: str

@dataclass
class Contradiction:
    episode: Episode
    prediction: str
    score: float

@dataclass
class TrainingReport:
    step: int
    processed: int
    contradictory: int
    threshold: float
    avg_score_all: float
    avg_score_contradictions: float


# =========================
# Scheduler
# =========================

class LinearThresholdScheduler:
    def __init__(self, start: float, end: float, total_steps: int):
        if total_steps <= 0:
            raise ValueError("total_steps must be > 0")
        self.start = start
        self.end = end
        self.total = total_steps
        self.step_idx = 0

    def value(self) -> float:
        t = min(self.step_idx, self.total)
        alpha = t / self.total
        return (1 - alpha) * self.start + alpha * self.end

    def step(self) -> None:
        self.step_idx += 1


# =========================
# Reflective updater
# =========================

class ReflectiveUpdater:
    def __init__(
        self,
        episodic_memory: "EpisodicMemory",
        world_model: "UpdatableWorldModel",
        self_model: "UpdatableSelfModel",
        similarity_fn,
        threshold: float = 0.8,
    ):
        self.mem = episodic_memory
        self.world = world_model
        self.self_model = self_model
        self.similarity = similarity_fn
        self.threshold = threshold
        self._train_step = 0

    def _chunk(self, it: Iterable[Episode], size: int) -> Iterator[List[Episode]]:
        batch: List[Episode] = []
        for e in it:
            batch.append(e)
            if len(batch) >= size:
                yield batch
                batch = []
        if batch:
            yield batch

    def detect_contradictions_detailed(self, limit: Optional[int] = None, batch_size: int = 128) -> List[Contradiction]:
        episodes = list(self.mem.recent())
        if limit is not None:
            episodes = episodes[:limit]

        contradictions: List[Contradiction] = []
        for batch in self._chunk(episodes, batch_size):
            for ep in batch:
                pred = self.world.simulate(ep.input)
                score = self.similarity(pred, ep.outcome)
                if score < self.threshold:
                    contradictions.append(Contradiction(ep, pred, score))
        return contradictions

    def revise_model_batched(self, batch_size: int = 128) -> int:
        contradictions = self.detect_contradictions_detailed(limit=None, batch_size=batch_size)
        if contradictions:
            eps = [c.episode for c in contradictions]
            self.world.update(eps)
            self.self_model.adapt(eps)
        return len(contradictions)

    def revise_model(self) -> int:
        return self.revise_model_batched()

    def training_step(
        self,
        max_episodes: Optional[int] = None,
        batch_size: int = 128,
        scheduler: Optional[LinearThresholdScheduler] = None,
    ) -> TrainingReport:
        if scheduler is not None:
            self.threshold = scheduler.value()

        episodes = list(self.mem.recent())
        if max_episodes is not None:
            episodes = episodes[:max_episodes]

        scores: List[float] = []
        contra_scores: List[float] = []
        contradictions: List[Episode] = []

        for batch in self._chunk(episodes, batch_size):
            for ep in batch:
                pred = self.world.simulate(ep.input)
                s = self.similarity(pred, ep.outcome)
                scores.append(s)
                if s < self.threshold:
                    contradictions.append(ep)
                    contra_scores.append(s)

        if contradictions:
            self.world.update(contradictions)
            self.self_model.adapt(contradictions)

        self._train_step += 1
        if scheduler is not None:
            scheduler.step()

        avg_all = sum(scores) / len(scores) if scores else 0.0
        avg_contra = sum(contra_scores) / len(contra_scores) if contra_scores else 0.0

        return TrainingReport(
            step=self._train_step,
            processed=len(episodes),
            contradictory=len(contradictions),
            threshold=self.threshold,
            avg_score_all=avg_all,
            avg_score_contradictions=avg_contra,
        )


# =========================
# Protocols for models
# =========================

class UpdatableWorldModel(Protocol):
    def simulate(self, x: Any) -> str: ...
    def update(self, contradictory: List[Episode]) -> None: ...

class UpdatableSelfModel(Protocol):
    def adapt(self, contradictory: List[Episode]) -> None: ...


# =========================
# Memory
# =========================

class EpisodicMemory:
    def __init__(self):
        self._episodes: List[Episode] = []

    def store(self, episode: Episode) -> None:
        self._episodes.append(episode)

    def recent(self) -> Iterable[Episode]:
        return list(self._episodes)


# =========================
# Grid world simulation
# =========================

def init_world(
    width: int = 5,
    height: int = 5,
    agent_pos: Tuple[int, int] = (0, 0),
    goal_pos: Tuple[int, int] = (3, 3),
    forbidden: Optional[List[Tuple[int, int]]] = None,
) -> Dict[str, Any]:
    return {
        "width": width,
        "height": height,
        "agent_pos": agent_pos,
        "goal_pos": goal_pos,
        "forbidden": set(forbidden or [(1, 1)]),
    }

def in_bounds(pos: Tuple[int, int], width: int, height: int) -> bool:
    x, y = pos
    return 0 <= x < width and 0 <= y < height

def manhattan(a: Tuple[int, int], b: Tuple[int, int]) -> int:
    return abs(a[0] - b[0]) + abs(a[1] - b[1])

def apply_physics(state: Dict[str, Any], actions: Dict[str, Any], rules: Dict[str, Any]) -> Dict[str, Any]:
    width, height = state["width"], state["height"]
    x, y = state["agent_pos"]

    act = actions.get("agent", {})
    if act.get("type") == "MOVE":
        dx, dy = 0, 0
        d = act.get("dir")
        if d == "UP":    dy = -1
        if d == "DOWN":  dy =  1
        if d == "LEFT":  dx = -1
        if d == "RIGHT": dx =  1
        nx, ny = x + dx, y + dy
        if in_bounds((nx, ny), width, height):
            state = {**state, "agent_pos": (nx, ny)}
    return state

class SimpleEthics:
    # Disallow stepping onto forbidden cells via NOOP.
    def filter_actions(self, state: Dict[str, Any], actions: Dict[str, Any]) -> Dict[str, Any]:
        act = actions.get("agent", {})
        if act.get("type") != "MOVE":
            return actions

        x, y = state["agent_pos"]
        d = act.get("dir")
        dx, dy = 0, 0
        if d == "UP":    dy = -1
        if d == "DOWN":  dy =  1
        if d == "LEFT":  dx = -1
        if d == "RIGHT": dx =  1
        target = (x + dx, y + dy)
        if target in state["forbidden"]:
            return {"agent": {"type": "NOOP"}}
        return actions


# =========================
# Agent core with BFS pathfinding
# =========================

class ListMemoryForAgent:
    def __init__(self) -> None:
        self.buffer: List[Dict[str, Any]] = []

    def store(self, feedback: Dict[str, Any]) -> None:
        self.buffer.append(feedback)

class PathfindingCore:
    def __init__(self) -> None:
        self.last_reward: float = 0.0

    def encode(self, env_state: Dict[str, Any]) -> Dict[str, Any]:
        ax, ay = env_state["agent_pos"]
        gx, gy = env_state["goal_pos"]
        width, height = env_state["width"], env_state["height"]
        forbidden = set(env_state.get("forbidden", set()))
        return {
            "agent_pos": (ax, ay),
            "goal_pos": (gx, gy),
            "width": width,
            "height": height,
            "forbidden": forbidden,
        }

    def _bfs_next_dir(
        self,
        start: Tuple[int, int],
        goal: Tuple[int, int],
        width: int,
        height: int,
        forbidden: set[Tuple[int, int]],
    ) -> Optional[str]:
        if start == goal:
            return None
        q = deque([start])
        visited = {start}
        parent: Dict[Tuple[int, int], Tuple[int, int]] = {}
        moves = [((0,-1), "UP"), ((0,1), "DOWN"), ((-1,0), "LEFT"), ((1,0), "RIGHT")]

        while q:
            cur = q.popleft()
            if cur == goal:
                break
            for (dx, dy), _ in moves:
                nx, ny = cur[0] + dx, cur[1] + dy
                nxt = (nx, ny)
                if not in_bounds(nxt, width, height):
                    continue
                if nxt in forbidden:
                    continue
                if nxt in visited:
                    continue
                visited.add(nxt)
                parent[nxt] = cur
                q.append(nxt)

        if goal not in parent and start != goal:
            return None

        # Reconstruct one step: backtrack from goal to start
        node = goal
        while parent.get(node) and parent[node] != start:
            node = parent[node]
        # node is the first step from start
        dx, dy = node[0] - start[0], node[1] - start[1]
        if (dx, dy) == (0, -1): return "UP"
        if (dx, dy) == (0, 1):  return "DOWN"
        if (dx, dy) == (-1, 0): return "LEFT"
        if (dx, dy) == (1, 0):  return "RIGHT"
        return None

    def reason(self, percepts: Dict[str, Any], memory: ListMemoryForAgent, values: Dict[str, Any]) -> Dict[str, Any]:
        start = percepts["agent_pos"]
        goal = percepts["goal_pos"]
        width = percepts["width"]
        height = percepts["height"]
        forbidden = percepts["forbidden"]

        direction = self._bfs_next_dir(start, goal, width, height, forbidden)
        if direction is None:
            # Fallback: greedy nudge that avoids forbidden and stays in bounds
            candidates = ["RIGHT", "DOWN", "LEFT", "UP"]
            # Sort toward decreasing Manhattan distance
            def after(pos, d):
                dx, dy = 0, 0
                if d == "UP": dy = -1
                if d == "DOWN": dy = 1
                if d == "LEFT": dx = -1
                if d == "RIGHT": dx = 1
                np = (pos[0]+dx, pos[1]+dy)
                return np
            candidates.sort(key=lambda d: manhattan(after(start, d), goal))
            for d in candidates:
                np = after(start, d)
                if in_bounds(np, width, height) and np not in forbidden:
                    direction = d
                    break

        if direction is None:
            return {"type": "NOOP"}
        return {"type": "MOVE", "dir": direction}

    def learn(self, feedback: Dict[str, Any]) -> None:
        self.last_reward = feedback.get("reward", 0.0)


class SimulatedAgent:
    def __init__(self, cognitive_core: PathfindingCore, memory: ListMemoryForAgent, values: Dict[str, Any]):
        self.core = cognitive_core
        self.memory = memory
        self.values = values
        self.percepts: Optional[Dict[str, Any]] = None

    def observe(self, env_state: Dict[str, Any]) -> None:
        self.percepts = self.core.encode(env_state)

    def act(self) -> Dict[str, Any]:
        if self.percepts is None:
            raise RuntimeError("act() called before observe(). Call observe(env_state) first.")
        return self.core.reason(self.percepts, self.memory, self.values)

    def update(self, feedback: Dict[str, Any]) -> None:
        self.memory.store(feedback)
        self.core.learn(feedback)


class WorldSimulator:
    def __init__(self, physics_rules: Dict[str, Any], ethical_laws: SimpleEthics, initial_state: Optional[Dict[str, Any]] = None):
        self.state = initial_state if initial_state is not None else init_world()
        self.rules = physics_rules
        self.ethics = ethical_laws

    def step(self, actions: Dict[str, Any]) -> Dict[str, Any]:
        safe_actions = self.ethics.filter_actions(self.state, actions)
        self.state = apply_physics(self.state, safe_actions, self.rules)
        return self.state


# =========================
# Outcome labeling and predictor
# =========================

def outcome_label(prev_state: Dict[str, Any], new_state: Dict[str, Any]) -> str:
    prev_pos = prev_state["agent_pos"]
    new_pos = new_state["agent_pos"]
    goal = new_state["goal_pos"]

    if new_pos == prev_pos:
        return "noop"
    if new_pos == goal:
        return "reached"
    pd = manhattan(prev_pos, goal)
    nd = manhattan(new_pos, goal)
    if nd < pd:
        return "closer"
    if nd > pd:
        return "farther"
    return "same"

def input_label(prev_state: Dict[str, Any], action: Dict[str, Any], goal: Tuple[int, int]) -> str:
    pos = prev_state["agent_pos"]
    d = action.get("dir", "NONE") if action.get("type") == "MOVE" else "NONE"
    return f"pos=({pos[0]},{pos[1]})|dir={d}|goal=({goal[0]},{goal[1]})"

class OutcomePredictor:
    """
    A simple world model that predicts outcomes ignoring ethics (assumes the move executes).
    It learns blocked transitions from contradictions and will predict 'noop' for those next time.
    """
    def __init__(self):
        self.blocked: set[Tuple[Tuple[int,int], str]] = set()

    def _parse(self, x: str) -> Tuple[Tuple[int,int], str, Tuple[int,int]]:
        try:
            parts = x.split("|")
            pos_s = parts[0].split("=")[1].strip("()")
            dir_s = parts[1].split("=")[1]
            goal_s = parts[2].split("=")[1].strip("()")
            px, py = map(int, pos_s.split(","))
            gx, gy = map(int, goal_s.split(","))
            return (px, py), dir_s, (gx, gy)
        except Exception:
            return (0, 0), "NONE", (0, 0)

    def simulate(self, x: Any) -> str:
        pos, direction, goal = self._parse(str(x))
        if (pos, direction) in self.blocked:
            return "noop"
        dx, dy = 0, 0
        if direction == "UP": dy = -1
        elif direction == "DOWN": dy = 1
        elif direction == "LEFT": dx = -1
        elif direction == "RIGHT": dx = 1
        new_pos = (pos[0] + dx, pos[1] + dy)
        if new_pos == goal:
            return "reached"
        pd = manhattan(pos, goal)
        nd = manhattan(new_pos, goal)
        if nd < pd:
            return "closer"
        if nd > pd:
            return "farther"
        return "same"

    def update(self, contradictory: List[Episode]) -> None:
        to_add: set[Tuple[Tuple[int,int], str]] = set()
        for ep in contradictory:
            pos, direction, _ = self._parse(ep.input)
            if ep.outcome == "noop" and direction != "NONE":
                to_add.add((pos, direction))
        new = to_add - self.blocked
        self.blocked |= to_add
        if new:
            print(f"[WorldModel] Learned blocked transitions: {sorted(new)}")


class ReflectiveSelfModel:
    def __init__(self):
        self.learned_count = 0

    def adapt(self, contradictory: List[Episode]) -> None:
        self.learned_count += len(contradictory)
        print(f"[SelfModel] Adapted on {len(contradictory)} contradictions (total={self.learned_count}).")


# =========================
# Similarity function
# =========================

def basic_similarity(a: str, b: str) -> float:
    return 1.0 if a == b else 0.0


# =========================
# Feedback utility
# =========================

def make_feedback(prev_state: Dict[str, Any], new_state: Dict[str, Any]) -> Dict[str, Any]:
    prev_d = manhattan(prev_state["agent_pos"], prev_state["goal_pos"])
    new_d = manhattan(new_state["agent_pos"], new_state["goal_pos"])
    reward = (prev_d - new_d)  # +1 closer, -1 farther, 0 same/noop
    if new_state["agent_pos"] == new_state["goal_pos"]:
        reward += 10.0
    return {"reward": reward, "reached_goal": new_state["agent_pos"] == new_state["goal_pos"]}


# =========================
# End-to-end main routine
# =========================

def run_simulation_and_training():
    # World and agent
    physics_rules = {"friction": 0.0}
    ethics = SimpleEthics()
    world = WorldSimulator(
        physics_rules,
        ethics,
        initial_state=init_world(
            width=5, height=5, agent_pos=(0, 0), goal_pos=(3, 3), forbidden=[(1, 1), (2, 2)]
        ),
    )

    agent = SimulatedAgent(PathfindingCore(), ListMemoryForAgent(), values={"risk": 0.0})
    epo_mem = EpisodicMemory()

    print("=== Rollout ===")
    max_steps = 20
    for t in range(max_steps):
        prev = dict(world.state)
        agent.observe(prev)
        action = agent.act()
        new_state = world.step({"agent": action})
        fb = make_feedback(prev, new_state)
        agent.update(fb)

        lbl_in = input_label(prev, action, new_state["goal_pos"])
        lbl_out = outcome_label(prev, new_state)
        epo_mem.store(Episode(lbl_in, lbl_out))

        print(
            f"t={t:02d} pos={new_state['agent_pos']} action={action} outcome={lbl_out} "
            f"reward={fb['reward']} goal={new_state['goal_pos']} reached={fb['reached_goal']}"
        )
        if fb["reached_goal"]:
            print("Goal reached. Stopping rollout.")
            break

    # Reflective updater
    updater = ReflectiveUpdater(
        episodic_memory=epo_mem,
        world_model=OutcomePredictor(),
        self_model=ReflectiveSelfModel(),
        similarity_fn=basic_similarity,
        threshold=0.8,
    )

    print("\n=== Audited contradictions ===")
    contradictions = updater.detect_contradictions_detailed(limit=512, batch_size=128)
    # Deduplicate identical lines and count occurrences
    counts = Counter((c.episode.input, c.prediction, c.episode.outcome) for c in contradictions)
    for (inp, pred, act), cnt in counts.items():
        print(f"[Contradiction x{cnt}] input='{inp}' pred='{pred}' actual='{act}' score=0.0")

    print("\n=== Batched revise ===")
    n = updater.revise_model_batched(batch_size=128)
    print(f"[Revision] Contradictions processed: {n}")

    print("\n=== Curriculum training ===")
    sched = LinearThresholdScheduler(start=0.7, end=0.95, total_steps=3)
    for _ in range(3):
        report = updater.training_step(max_episodes=512, batch_size=128, scheduler=sched)
        print(
            f"[Training] step={report.step} processed={report.processed} "
            f"contradictory={report.contradictory} threshold={report.threshold:.2f} "
            f"avg_all={report.avg_score_all:.3f} avg_contra={report.avg_score_contradictions:.3f}"
        )

if __name__ == "__main__":
    run_simulation_and_training()