<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/spiral_question_pipeline_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
# spiral_question_pipeline.py
# End-to-end generator → semantic scorer → artifact bundler with optional AES-GCM encryption.

import argparse
import csv
import hashlib
import io
import json
import os
import random
import secrets
import string
import sys
import time
import zipfile
from dataclasses import dataclass, asdict
from pathlib import Path
from typing import List, Tuple, Dict, Any, Optional

import yaml  # pip install pyyaml
from sentence_transformers import SentenceTransformer, util  # pip install sentence-transformers

# Encryption deps
from cryptography.hazmat.primitives.kdf.scrypt import Scrypt  # pip install cryptography
from cryptography.hazmat.primitives.ciphers.aead import AESGCM


# ------------------------------
# Helpers: RNG, hashing, time
# ------------------------------

def now_iso() -> str:
    return time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime())

def sha256_bytes(b: bytes) -> str:
    return hashlib.sha256(b).hexdigest()

def sha256_file(path: Path) -> str:
    h = hashlib.sha256()
    with path.open('rb') as f:
        for chunk in iter(lambda: f.read(8192), b''):
            h.update(chunk)
    return h.hexdigest()

def seed_rng(seed: Optional[int]) -> random.Random:
    if seed is None:
        seed = secrets.randbelow(2**31)
    rng = random.Random(seed)
    return rng

def slugify(s: str, maxlen: int = 60) -> str:
    allowed = string.ascii_letters + string.digits + "-_"
    s2 = ''.join(ch if ch in allowed else '-' for ch in s.strip())
    s2 = '-'.join(filter(None, s2.split('-')))
    return s2[:maxlen]


# ------------------------------
# Ideals: load/save
# ------------------------------

DEFAULT_IDEALS = [
    "How could artificial intelligence uncover hidden symmetries in the laws of nature?",
    "In what ways might AGI reshape our understanding of time and causality?",
    "Can the emergence of intelligence be considered a phase transition in the fabric of spacetime?",
]

def load_ideals(path: Optional[Path]) -> List[str]:
    if path is None:
        return DEFAULT_IDEALS[:]
    if not path.exists():
        return []
    with path.open('r', encoding='utf-8') as f:
        lines = [ln.strip() for ln in f if ln.strip()]
    return lines

def append_ideals(path: Path, new_lines: List[str]) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    with path.open('a', encoding='utf-8') as f:
        for line in new_lines:
            f.write(line.rstrip() + "\n")


# ------------------------------
# Question generation (template-based)
# ------------------------------

TEMPLATES = [
    "How might {topic} reveal new structures in {context}?",
    "In what ways could {topic} transform our understanding of {context}?",
    "Could {topic} act as a bridge between {context} and {context2}?",
    "What would it mean if {topic} obeys a conservation law in {context}?",
    "Is there a phase transition where {topic} emerges from {context}?",
    "How could {topic} expose hidden symmetries within {context}?",
    "What constraints would {topic} impose on {context} at scale?",
    "Could {topic} be modeled as information flow across {context}?",
    "Does {topic} alter causal structure in {context}?",
    "What empirical signatures would {topic} leave in {context}?",
    "Can {topic} regularize paradoxes in {context} without breaking {context2}?",
    "Is {topic} fundamentally discrete or continuous within {context}?",
    "Could {topic} be reframed as a gauge over {context}?",
    "What happens to {topic} near critical points of {context}?",
    "Can {topic} be understood as a symmetry breaking in {context}?"
]

CONTEXTS = [
    "spacetime", "thermodynamics", "quantum fields", "cosmology",
    "information theory", "causality", "entropy", "measurement",
    "complex systems", "emergence", "symmetry", "computation",
    "observation", "gravity", "topology", "phase transitions"
]

def parse_topics(topics_str: Optional[str]) -> List[str]:
    if not topics_str:
        return ["AGI", "artificial intelligence", "consciousness", "alignment", "symbolic systems"]
    return [t.strip() for t in topics_str.split(",") if t.strip()]

def generate_questions(topics: List[str], num: int, rng: random.Random) -> List[str]:
    qs = []
    for _ in range(num):
        topic = rng.choice(topics)
        ctx1 = rng.choice(CONTEXTS)
        ctx2 = rng.choice([c for c in CONTEXTS if c != ctx1] or [ctx1])
        tpl = rng.choice(TEMPLATES)
        q = tpl.format(topic=topic, context=ctx1, context2=ctx2).strip()
        # Normalize punctuation
        if not q.endswith("?"):
            q += "?"
        qs.append(q)
    # Deduplicate while preserving order
    seen = set()
    out = []
    for q in qs:
        if q not in seen:
            seen.add(q)
            out.append(q)
    return out


# ------------------------------
# Semantic scoring (vectorized)
# ------------------------------

@dataclass
class ScoreResult:
    question: str
    best_score: float
    best_ideal_idx: int

def load_model(model_name: str) -> SentenceTransformer:
    return SentenceTransformer(model_name)

def score_questions(model: SentenceTransformer, questions: List[str], ideals: List[str]) -> List[ScoreResult]:
    if not questions:
        return []
    ideal_emb = model.encode(ideals, convert_to_tensor=True, normalize_embeddings=True)
    q_emb = model.encode(questions, convert_to_tensor=True, normalize_embeddings=True)
    sims = util.cos_sim(q_emb, ideal_emb)  # [Q x I]
    # Get per-row max and argmax
    max_vals = sims.max(dim=1).values
    argmax_idx = sims.argmax(dim=1)
    results = [
        ScoreResult(question=questions[i], best_score=float(max_vals[i].item()), best_ideal_idx=int(argmax_idx[i].item()))
        for i in range(len(questions))
    ]
    results.sort(key=lambda r: r.best_score, reverse=True)
    return results


# ------------------------------
# SVG artifact (simple list)
# ------------------------------

def render_svg_list(pairs: List[Tuple[str, float]], width: int = 960, line_height: int = 28, margin: int = 24) -> str:
    lines = [f"{score:.3f}  {q}" for q, score in pairs]
    height = margin * 2 + line_height * len(lines)
    # Escape basic XML entities
    def esc(s: str) -> str:
        return s.replace("&", "&amp;").replace("<", "&lt;").replace(">", "&gt;")
    y = margin + line_height
    text_elems = []
    for idx, line in enumerate(lines, 1):
        text_elems.append(
            f'<text x="{margin}" y="{y}" font-family="Monaco, Menlo, Consolas, monospace" font-size="14">{esc(line)}</text>'
        )
        y += line_height
    svg = f'''<svg xmlns="http://www.w3.org/2000/svg" width="{width}" height="{height}" viewBox="0 0 {width} {height}">
  <rect width="100%" height="100%" fill="#0b0f14"/>
  <g fill="#e6edf3">
    <text x="{margin}" y="{margin}" font-family="Monaco, Menlo, Consolas, monospace" font-size="16" font-weight="bold">Semantic ranking (score 0..1)</text>
    {"".join(text_elems)}
  </g>
</svg>'''
    return svg


# ------------------------------
# Manifest + bundle
# ------------------------------

def write_text(path: Path, data: str) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    path.write_text(data, encoding='utf-8')

def write_json(path: Path, obj: Any) -> None:
    write_text(path, json.dumps(obj, ensure_ascii=False, indent=2))

def write_csv(path: Path, rows: List[Dict[str, Any]], fieldnames: List[str]) -> None:
    path.parent.mkdir(parents=True, exist_ok=True)
    with path.open('w', newline='', encoding='utf-8') as f:
        w = csv.DictWriter(f, fieldnames=fieldnames)
        w.writeheader()
        for r in rows:
            w.writerow(r)

def build_manifest(root: Path, files: List[Path], meta: Dict[str, Any]) -> Dict[str, Any]:
    items = []
    for p in files:
        items.append({
            "path": str(p.relative_to(root)),
            "sha256": sha256_file(p),
            "size": p.stat().st_size,
        })
    manifest = {
        "version": "1.0",
        "created": now_iso(),
        "root": str(root),
        "meta": meta,
        "files": items
    }
    return manifest

def zip_dir(root: Path, out_zip: Path) -> None:
    with zipfile.ZipFile(out_zip, 'w', zipfile.ZIP_DEFLATED) as zf:
        for p in root.rglob("*"):
            if p.is_file():
                zf.write(p, arcname=str(p.relative_to(root)))


# ------------------------------
# Encryption (AES-256-GCM with scrypt KDF)
# ------------------------------

def derive_key_scrypt(passphrase: str, salt: bytes, n: int = 2**14, r: int = 8, p: int = 1, key_len: int = 32) -> bytes:
    kdf = Scrypt(salt=salt, length=key_len, n=n, r=r, p=p)
    return kdf.derive(passphrase.encode('utf-8'))

def encrypt_bytes_aesgcm(plaintext: bytes, passphrase: str) -> bytes:
    salt = secrets.token_bytes(16)
    key = derive_key_scrypt(passphrase, salt)
    aesgcm = AESGCM(key)
    nonce = secrets.token_bytes(12)
    ct = aesgcm.encrypt(nonce, plaintext, associated_data=None)
    header = {
        "alg": "AES-256-GCM",
        "kdf": "scrypt",
        "salt_b64": base64_encode(salt),
        "nonce_b64": base64_encode(nonce),
        "kdf_params": {"n": 16384, "r": 8, "p": 1, "len": 32},
    }
    blob = {
        "header": header,
        "ciphertext_b64": base64_encode(ct),
    }
    return json.dumps(blob, indent=2).encode('utf-8')

def base64_encode(b: bytes) -> str:
    import base64
    return base64.b64encode(b).decode('ascii')


# ------------------------------
# CLI
# ------------------------------

def main():
    ap = argparse.ArgumentParser(description="Generate → score → bundle → encrypt pipeline for philosophical questions.")
    ap.add_argument("--topics", type=str, default=None, help="Comma-separated topics (default: AGI, artificial intelligence, ...)")
    ap.add_argument("--num", type=int, default=40, help="Number of candidate questions to generate")
    ap.add_argument("--model", type=str, default="sentence-transformers/all-MiniLM-L6-v2", help="SentenceTransformer model name")
    ap.add_argument("--ideals-file", type=Path, default=None, help="Path to ideals.txt (one per line). Defaults to built-ins if not given")
    ap.add_argument("--topk", type=int, default=15, help="Number of top results to include prominently and for feedback")
    ap.add_argument("--seed", type=int, default=None, help="RNG seed for reproducibility")
    ap.add_argument("--outdir", type=Path, required=True, help="Output directory for artifacts")
    ap.add_argument("--bundle", action="store_true", help="Create a ZIP bundle of artifacts")
    ap.add_argument("--encrypt", action="store_true", help="Encrypt the bundle with AES-GCM (requires --bundle)")
    ap.add_argument("--passphrase", type=str, default=None, help="Passphrase to encrypt the bundle (if omitted, read from stdin hidden)")
    ap.add_argument("--update-ideals", action="store_true", help="Append top-K back into ideals file")
    args = ap.parse_args()

    rng = seed_rng(args.seed)
    outdir: Path = args.outdir
    outdir.mkdir(parents=True, exist_ok=True)

    # Load ideals
    ideals = load_ideals(args.ideals_file)
    if not ideals:
        print("No ideals found; using defaults.")
        ideals = DEFAULT_IDEALS[:]

    # Generate candidates
    topics = parse_topics(args.topics)
    questions = generate_questions(topics, args.num, rng)

    # Load model and score
    print(f"Loading model: {args.model}")
    model = load_model(args.model)
    print("Scoring questions...")
    scored = score_questions(model, questions, ideals)

    # Prepare rows and top-K
    ranked_pairs = [(r.question, r.best_score) for r in scored]
    top_pairs = ranked_pairs[: max(1, args.topk)]

    # Write artifacts
    meta = {
        "created": now_iso(),
        "model": args.model,
        "topics": topics,
        "num_generated": len(questions),
        "num_ranked": len(scored),
        "topk": args.topk,
        "seed": args.seed,
        "ideals_file": str(args.ideals_file) if args.ideals_file else "(builtin)",
    }

    # out.csv
    csv_rows = []
    for rank, (q, s) in enumerate(ranked_pairs, 1):
        csv_rows.append({"rank": rank, "score": f"{s:.6f}", "question": q})
    write_csv(outdir / "out.csv", csv_rows, fieldnames=["rank", "score", "question"])

    # out.json
    write_json(outdir / "out.json", {
        "meta": meta,
        "ranked": [{"rank": i + 1, "score": s, "question": q} for i, (q, s) in enumerate(ranked_pairs)]
    })

    # top.txt
    write_text(outdir / "top.txt", "\n".join([f"[{s:.3f}] {q}" for q, s in top_pairs]))

    # svg
    svg = render_svg_list(top_pairs)
    write_text(outdir / "out.svg", svg)

    # plan.json (simple)
    write_json(outdir / "plan.json", {
        "goal": "Generate and rank philosophically-rich questions by semantic alignment to ideals.",
        "steps": [
            "Generate template-based candidates",
            "Encode with SentenceTransformer",
            "Vectorized cosine similarity vs ideals",
            "Sort by best match",
            "Emit artifacts and manifest",
            "Optionally bundle and encrypt"
        ]
    })

    # manifest.yaml
    # Collect file list
    files = [
        outdir / "out.csv",
        outdir / "out.json",
        outdir / "top.txt",
        outdir / "out.svg",
        outdir / "plan.json",
    ]
    manifest = build_manifest(outdir, files, meta)
    write_text(outdir / "manifest.yaml", yaml.safe_dump(manifest, sort_keys=False))
    files.append(outdir / "manifest.yaml")

    # Feedback loop
    if args.update_ideals:
        target_file = args.ideals_file or (outdir / "ideals.txt")
        append_lines = [q for q, _ in top_pairs]
        append_ideals(Path(target_file), append_lines)
        print(f"Appended top-{len(append_lines)} questions to {target_file}")

    # Bundle
    bundle_path = None
    if args.bundle:
        bundle_name = f"bundle-{slugify(meta['model'])}-{int(time.time())}.zip"
        bundle_path = outdir / bundle_name
        zip_dir(outdir, bundle_path)
        print(f"Wrote bundle: {bundle_path}")

    # Encrypt
    if args.encrypt:
        if not args.bundle or bundle_path is None:
            print("Error: --encrypt requires --bundle", file=sys.stderr)
            sys.exit(2)
        # Get passphrase
        passphrase = args.passphrase
        if not passphrase:
            try:
                import getpass
                passphrase = getpass.getpass("Enter passphrase for encryption: ")
            except Exception:
                print("Error: passphrase not provided and cannot prompt. Use --passphrase.", file=sys.stderr)
                sys.exit(2)
        with bundle_path.open('rb') as f:
            plaintext = f.read()
        enc_bytes = encrypt_bytes_aesgcm(plaintext, passphrase)
        enc_path = bundle_path.with_suffix(bundle_path.suffix + ".enc.json")
        with enc_path.open('wb') as f:
            f.write(enc_bytes)
        print(f"Encrypted bundle: {enc_path} (JSON envelope with AES-GCM)")

    # Console summary
    print("\nTop results:")
    for i, (q, s) in enumerate(top_pairs, 1):
        print(f"[{s:.3f}] {i}. {q}")

if __name__ == "__main__":
    main()