<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/conceptsynth_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
# conceptsynth.py — pluggable concept synthesizer scaffold

from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, List, Protocol, Tuple, Optional
import hashlib
import json
import math
import random

# ---------------------------
# Interfaces (protocols)
# ---------------------------

class Encoder(Protocol):
    def encode(self, raw: Any) -> Dict[str, Any]: ...

class Symbolizer(Protocol):
    def assign(self, concept: Dict[str, Any]) -> str: ...

class Ontology(Protocol):
    def integrate(self, symbol: str, concept: Dict[str, Any]) -> None: ...
    def neighbors(self, symbol: str, k: int = 5) -> List[str]: ...

# ---------------------------
# Core synthesizer
# ---------------------------

@dataclass
class ConceptSynthesizer:
    encoder: Encoder           # e.g., CLIP-like
    symbolizer: Symbolizer     # e.g., VQ / codebook
    ontology: Ontology         # e.g., probabilistic hypergraph
    trace: bool = True

    def process_experience(self, raw_data: Any) -> str:
        latent = self.encoder.encode(raw_data)
        invariants = self.extract_invariants(latent)
        concept = self.synthesize(latent, invariants)
        symbol = self.symbolizer.assign(concept)
        self.ontology.integrate(symbol, concept)
        if self.trace:
            print(f"[ConceptSynth] symbol={symbol} inv={invariants} meta={concept.get('meta')}")
        return symbol

    def extract_invariants(self, latent: Dict[str, Any]) -> Dict[str, Any]:
        return detect_geometric_invariants(latent)

    def synthesize(self, latent: Dict[str, Any], invariants: Dict[str, Any]) -> Dict[str, Any]:
        return abstract_latent(latent, invariants)

# ---------------------------
# Default invariant and abstraction logic
# ---------------------------

def detect_geometric_invariants(latent: Dict[str, Any]) -> Dict[str, Any]:
    """
    Example: compute norm, dominant axis, coarse orientation from vector-like content.
    Expects latent['vec']: List[float].
    """
    vec = latent.get("vec", [])
    if not vec:
        return {"norm": 0.0, "axis": None, "orientation": None}
    norm = math.sqrt(sum(x*x for x in vec))
    axis = max(range(len(vec)), key=lambda i: abs(vec[i]))
    orientation = "positive" if vec[axis] >= 0 else "negative"
    return {"norm": round(norm, 4), "axis": axis, "orientation": orientation}

def abstract_latent(latent: Dict[str, Any], invariants: Dict[str, Any]) -> Dict[str, Any]:
    """
    Combine latent stats + invariants into a minimal concept record.
    """
    concept = {
        "invariants": invariants,
        "meta": {
            "source": latent.get("source"),
            "hash": latent.get("hash"),
            "dim": len(latent.get("vec", [])),
        }
    }
    # Optional: attach sparse signature (top-k indices)
    vec = latent.get("vec", [])
    if vec:
        k = min(4, len(vec))
        tops = sorted(range(len(vec)), key=lambda i: abs(vec[i]), reverse=True)[:k]
        concept["signature"] = {"top_idx": tops, "top_vals": [vec[i] for i in tops]}
    return concept

# ---------------------------
# Dummy implementations
# ---------------------------

class ToyEncoder:
    """
    Maps raw string or dict to a fixed-length numeric vector with a stable hash,
    mimicking a contrastive encoder interface.
    """
    def __init__(self, dim: int = 8, seed: int = 7) -> None:
        self.dim = dim
        random.seed(seed)

    def encode(self, raw: Any) -> Dict[str, Any]:
        s = json.dumps(raw, sort_keys=True) if isinstance(raw, (dict, list)) else str(raw)
        h = hashlib.sha256(s.encode("utf-8")).digest()
        # Deterministic pseudo-vector from hash bytes
        vec = []
        for i in range(self.dim):
            chunk = h[4*i:4*i+4]
            val = int.from_bytes(chunk, "little", signed=False)
            # map to [-1, 1]
            vec.append((val % 10_000) / 5000.0 - 1.0)
        return {"vec": vec, "source": "toy", "hash": hashlib.sha256(s.encode()).hexdigest()[:16]}

class VQSymbolizer:
    """
    Very simple codebook: bucket by coarse axis + sign + norm bin.
    """
    def __init__(self, bins: int = 4) -> None:
        self.bins = bins

    def assign(self, concept: Dict[str, Any]) -> str:
        inv = concept.get("invariants", {})
        axis = inv.get("axis")
        orientation = inv.get("orientation")
        norm = float(inv.get("norm", 0.0))
        bin_id = int(min(self.bins-1, max(0, norm)))  # crude bin on norm
        return f"CAX{axis}:{orientation}:B{bin_id}"

class HypergraphOntology:
    """
    Maintains adjacency by shared signature indices and norm proximity.
    """
    def __init__(self) -> None:
        self.nodes: Dict[str, Dict[str, Any]] = {}
        self.edges: Dict[str, Dict[str, float]] = {}  # symbol -> neighbor -> weight

    def integrate(self, symbol: str, concept: Dict[str, Any]) -> None:
        prev = self.nodes.get(symbol)
        self.nodes[symbol] = concept
        self.edges.setdefault(symbol, {})
        # Connect to neighbors by simple heuristic
        for other, c in self.nodes.items():
            if other == symbol:
                continue
            w = self._similarity(concept, c)
            if w > 0:
                self.edges[symbol][other] = max(self.edges[symbol].get(other, 0.0), w)
                self.edges.setdefault(other, {})
                self.edges[other][symbol] = max(self.edges[other].get(symbol, 0.0), w)

    def _similarity(self, a: Dict[str, Any], b: Dict[str, Any]) -> float:
        sig_a = set(a.get("signature", {}).get("top_idx", []))
        sig_b = set(b.get("signature", {}).get("top_idx", []))
        if not sig_a or not sig_b:
            return 0.0
        j = len(sig_a & sig_b) / len(sig_a | sig_b)
        return j

    def neighbors(self, symbol: str, k: int = 5) -> List[str]:
        nbrs = self.edges.get(symbol, {})
        return [s for s, _ in sorted(nbrs.items(), key=lambda kv: kv[1], reverse=True)[:k]]

# ---------------------------
# Demo
# ---------------------------

def demo():
    encoder = ToyEncoder(dim=12)
    symbolizer = VQSymbolizer(bins=5)
    ontology = HypergraphOntology()
    cs = ConceptSynthesizer(encoder, symbolizer, ontology, trace=True)

    inputs = [
        {"image_id": 1, "caption": "red circle top-left"},
        {"image_id": 2, "caption": "blue square bottom-right"},
        {"image_id": 3, "caption": "green triangle center"},
        {"image_id": 1, "caption": "red circle top-left (variant)"},
    ]
    for x in inputs:
        sym = cs.process_experience(x)
        print("-> neighbors:", ontology.neighbors(sym, k=3))

if __name__ == "__main__":
    demo()