<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/recursive_generalization_engine_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
# recursive_generalization_engine.py
# Ready-to-run: a safer, deduplicated, CLI-friendly recursive generalization engine
# with a minimal plug-in stack (abstraction → theory → simulation → validation).

from __future__ import annotations

import argparse
import math
import os
import random
from dataclasses import dataclass, field
from typing import Any, Dict, Iterable, List, Optional, Protocol, Tuple


# =========================
# Protocols and data models
# =========================

class AbstractionEngine(Protocol):
    def extract(self, input_data: Any) -> List[Dict[str, Any]]:
        ...


class TheoryConstructor(Protocol):
    def build(self, candidate: Dict[str, Any]) -> "Theory":
        ...


class Simulator(Protocol):
    def test(self, theory: "Theory") -> "SimOutput":
        ...


class Validator(Protocol):
    threshold: float
    def score(self, theory: "Theory", sim_output: "SimOutput") -> float:
        ...


@dataclass(frozen=True)
class Theory:
    """Simple hashable theory representation."""
    name: str
    params: Tuple[Tuple[str, Any], ...]  # sorted key-value tuples for stable hashing

    def key(self) -> Tuple[str, Tuple[Tuple[str, Any], ...]]:
        return (self.name, self.params)

    @staticmethod
    def from_dict(name: str, params: Dict[str, Any]) -> "Theory":
        # Sort params to ensure deterministic identity and hashing
        items = tuple(sorted(params.items(), key=lambda kv: kv[0]))
        return Theory(name=name, params=items)

    def describe(self) -> str:
        kv = ", ".join(f"{k}={v}" for k, v in self.params)
        return f"{self.name}({kv})"


@dataclass
class SimOutput:
    """Simulation result used for scoring and (optionally) deeper recursion."""
    trials: int
    accuracy: float
    details: Dict[str, Any] = field(default_factory=dict)


# =========================
# The recursive engine
# =========================

class RecursiveGeneralizationEngine:
    def __init__(
        self,
        abstraction_engine: AbstractionEngine,
        theory_constructor: TheoryConstructor,
        simulator: Simulator,
        validator: Validator,
        *,
        max_candidates_per_level: int = 16,
        top_k_knowledge: int = 32,
        verbose: bool = True,
    ):
        self.abstraction_engine = abstraction_engine
        self.theory_constructor = theory_constructor
        self.simulator = simulator
        self.validator = validator
        self.knowledge_base: List[Tuple[Theory, float, SimOutput]] = []
        self._seen: set[Tuple[str, Tuple[Tuple[str, Any], ...]]] = set()
        self.max_candidates_per_level = max_candidates_per_level
        self.top_k_knowledge = top_k_knowledge
        self.verbose = verbose

    def _log(self, depth: int, msg: str):
        if self.verbose:
            indent = "  " * depth
            print(f"{indent}{msg}")

    def _consider(self, theory: Theory, score: float, sim_output: SimOutput):
        self.knowledge_base.append((theory, score, sim_output))
        # Keep best top_k sorted by score descending, then accuracy, then trials
        self.knowledge_base.sort(key=lambda x: (x[1], x[2].accuracy, x[2].trials), reverse=True)
        if len(self.knowledge_base) > self.top_k_knowledge:
            self.knowledge_base = self.knowledge_base[: self.top_k_knowledge]

    def run(self, input_data: Any, depth: int = 0, max_depth: int = 3, budget: Optional[int] = None):
        """
        Run recursive generalization. Returns knowledge_base [(theory, score, sim_output), ...].
        - budget: optional global cap on total expansions (nodes).
        """
        if budget is not None and budget <= 0:
            self._log(depth, "[HALT] Budget exhausted.")
            return self.knowledge_base

        # STEP 1: Identify patterns/abstractions (with candidate cap)
        candidates = self.abstraction_engine.extract(input_data)[: self.max_candidates_per_level]
        self._log(depth, f"[EXTRACT] {len(candidates)} candidate(s)")

        expansions_done = 0

        for c in candidates:
            if budget is not None and budget <= 0:
                self._log(depth, "[HALT] Budget exhausted mid-level.")
                break

            # STEP 2: Construct candidate theory
            try:
                theory = self.theory_constructor.build(c)
            except Exception as e:
                self._log(depth, f"[SKIP] Theory build failed: {e}")
                continue

            if theory.key() in self._seen:
                self._log(depth, f"[SEEN] {theory.describe()}")
                continue
            self._seen.add(theory.key())
            self._log(depth, f"[THEORY] {theory.describe()}")

            # STEP 3: Simulate consequences
            try:
                sim_output = self.simulator.test(theory)
            except Exception as e:
                self._log(depth, f"[SKIP] Simulation failed: {e}")
                continue

            # STEP 4: Evaluate generalization
            try:
                score = self.validator.score(theory, sim_output)
            except Exception as e:
                self._log(depth, f"[SKIP] Scoring failed: {e}")
                continue

            # STEP 5: If strong, store it
            verdict = "PASS" if score >= self.validator.threshold else "FAIL"
            self._log(depth, f"[EVAL:{verdict}] score={score:.3f} acc={sim_output.accuracy:.3f} trials={sim_output.trials}")
            if score >= self.validator.threshold:
                self._consider(theory, score, sim_output)

            expansions_done += 1
            if budget is not None:
                budget -= 1

            # STEP 6: Recursive generalization (feed simulation artifacts forward)
            if depth < max_depth:
                next_input = sim_output.details.get("synthetic_data", [theory])
                self.run(next_input, depth + 1, max_depth, budget)

        return self.knowledge_base


# =========================
# Minimal plug-in stack (toy domain)
# =========================

# Domain: sequences of floats. We extract trend candidates, build a theory that predicts
# the sign of change, simulate AR(1)-like sequences with drift, and score by direction-accuracy.

def _sign(x: float) -> int:
    return 1 if x > 0 else (-1 if x < 0 else 0)

def _trend_of(seq: List[float]) -> int:
    if len(seq) < 2:
        return 0
    diffs = [b - a for a, b in zip(seq[:-1], seq[1:])]
    avg = sum(diffs) / len(diffs)
    return _sign(avg)

class SimpleAbstraction:
    def __init__(self, propose_inverse: bool = True):
        self.propose_inverse = propose_inverse

    def extract(self, input_data: Any) -> List[Dict[str, Any]]:
        # Accept a list of sequences or a single sequence; normalize to list of sequences
        sequences: List[List[float]] = []
        if isinstance(input_data, dict) and "synthetic_data" in input_data:
            sequences = input_data["synthetic_data"]
        elif isinstance(input_data, list) and input_data and isinstance(input_data[0], list):
            sequences = input_data  # list of sequences
        elif isinstance(input_data, list) and all(isinstance(x, (int, float)) for x in input_data):
            sequences = [input_data]
        else:
            # Unknown shape; produce a neutral candidate
            return [{"trend": 0, "momentum": 0.0}]

        # Aggregate trend across sequences
        trends = [_trend_of(seq) for seq in sequences]
        net_trend = _sign(sum(trends))
        candidates = [{"trend": net_trend, "momentum": 0.1}]
        if self.propose_inverse:
            candidates.append({"trend": -net_trend, "momentum": 0.1})
        # Also propose a neutral candidate
        candidates.append({"trend": 0, "momentum": 0.0})
        return candidates

class SimpleTheoryBuilder:
    def build(self, candidate: Dict[str, Any]) -> Theory:
        trend = int(candidate.get("trend", 0))
        momentum = float(candidate.get("momentum", 0.1))
        return Theory.from_dict("DirectionRule", {"trend": trend, "momentum": momentum})

class DriftSimulator:
    def __init__(self, *, trials: int = 100, length: int = 50, sigma: float = 0.5, base_mu: float = 0.15, seed: int = 7):
        self.trials = trials
        self.length = length
        self.sigma = sigma
        self.base_mu = base_mu
        self.seed = seed

    def test(self, theory: Theory) -> SimOutput:
        # Extract params
        params = dict(theory.params)
        trend = int(params.get("trend", 0))
        momentum = float(params.get("momentum", 0.0))

        rng = random.Random(self.seed + hash(theory.key()) % (10**6))
        correct = 0
        total = 0
        all_seqs: List[List[float]] = []

        for _ in range(self.trials):
            x = 0.0
            seq = [x]
            mu = self.base_mu * trend + momentum * trend
            for _t in range(self.length):
                noise = rng.gauss(0.0, self.sigma)
                x_next = x + mu + noise
                pred = _sign(mu)  # theory predicts sign of drift
                actual = _sign(x_next - x)
                if pred == actual:
                    correct += 1
                total += 1
                x = x_next
                seq.append(x)
            all_seqs.append(seq)

        accuracy = (correct / total) if total > 0 else 0.0
        return SimOutput(
            trials=self.trials,
            accuracy=accuracy,
            details={"synthetic_data": all_seqs, "mu": mu, "sigma": self.sigma}
        )

class AccuracyValidator:
    def __init__(self, threshold: float = 0.60):
        self.threshold = threshold

    def score(self, theory: Theory, sim_output: SimOutput) -> float:
        # Directly use accuracy as score; could include parsimony or penalty terms
        return sim_output.accuracy


# =========================
# CLI and main
# =========================

def build_parser() -> argparse.ArgumentParser:
    p = argparse.ArgumentParser(description="Recursive Generalization Engine (toy domain)")
    p.add_argument("--max_depth", type=int, default=3, help="Maximum recursion depth")
    p.add_argument("--budget", type=int, default=64, help="Global expansion budget (nodes)")
    p.add_argument("--max_candidates_per_level", type=int, default=8, help="Candidate cap per level")
    p.add_argument("--top_k_knowledge", type=int, default=16, help="Top-K retained in knowledge base")
    p.add_argument("--threshold", type=float, default=0.60, help="Minimum score to accept a theory")
    p.add_argument("--trials", type=int, default=80, help="Simulator trials per theory")
    p.add_argument("--length", type=int, default=40, help="Length of each simulated sequence")
    p.add_argument("--sigma", type=float, default=0.5, help="Noise std-dev for simulator")
    p.add_argument("--base_mu", type=float, default=0.15, help="Base drift magnitude")
    p.add_argument("--seed", type=int, default=7)
    p.add_argument("--verbose", type=lambda x: str(x).lower() in {"1","true","yes","y"}, default=True)
    return p

def make_initial_data(seed: int = 7, sequences: int = 4, length: int = 20) -> List[List[float]]:
    rng = random.Random(seed)
    data = []
    for s in range(sequences):
        x = 0.0
        seq = [x]
        drift = rng.uniform(-0.2, 0.2)
        for _ in range(length):
            x += drift + rng.gauss(0.0, 0.3)
            seq.append(x)
        data.append(seq)
    return data

def main():
    args = build_parser().parse_args()
    random.seed(args.seed)
    os.environ.setdefault("PYTHONHASHSEED", "0")

    abstraction = SimpleAbstraction(propose_inverse=True)
    theory_builder = SimpleTheoryBuilder()
    simulator = DriftSimulator(
        trials=args.trials, length=args.length, sigma=args.sigma, base_mu=args.base_mu, seed=args.seed
    )
    validator = AccuracyValidator(threshold=args.threshold)

    engine = RecursiveGeneralizationEngine(
        abstraction, theory_builder, simulator, validator,
        max_candidates_per_level=args.max_candidates_per_level,
        top_k_knowledge=args.top_k_knowledge,
        verbose=args.verbose
    )

    initial_data = make_initial_data(seed=args.seed, sequences=4, length=20)

    print("[START] Running recursive generalization")
    kb = engine.run(initial_data, depth=0, max_depth=args.max_depth, budget=args.budget)

    print("\n=== KNOWLEDGE BASE (Top theories) ===")
    if not kb:
        print("No accepted theories (try lowering --threshold or increasing --budget).")
    else:
        for i, (theory, score, sim) in enumerate(kb, 1):
            verdict = "PASS" if score >= validator.threshold else "FAIL"
            print(f"{i:02d}. {theory.describe()} | score={score:.3f} | acc={sim.accuracy:.3f} | trials={sim.trials} [{verdict}]")

    print("\n=== SUMMARY ===")
    print(f"Depth limit: {args.max_depth} | Budget: {args.budget} | Threshold: {args.threshold:.2f}")
    print(f"Candidates/level cap: {args.max_candidates_per_level} | KB top-K: {args.top_k_knowledge}")
    print("[DONE]")

if __name__ == "__main__":
    main()