<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/meta_intelligence_end_to_end_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-

"""
meta_intelligence_end_to_end.py

A notebook/Colab-safe end-to-end script for training, evaluating, and
predicting with a compact MLP on synthetic blobs, supporting:
- Self-supervised (consistency) training
- Supervised training
- Hybrid (self + supervised) training

Artifacts:
- <run_dir>/config.json
- <run_dir>/metrics.csv
- <run_dir>/model.pt
- <run_dir>/metrics.png        (if matplotlib available)
- <run_dir>/boundary.png       (if dim == 2 and matplotlib available)
"""

import argparse
import csv
import json
import math
import os
import random
import sys
import time
from dataclasses import dataclass, asdict
from typing import List, Optional, Tuple

import numpy as np

# Optional plotting (script runs without it)
try:
    import matplotlib.pyplot as plt
    _HAS_MPL = True
except Exception:
    _HAS_MPL = False

import torch
import torch.nn as nn
import torch.nn.functional as F


# ----------------------------
# Notebook-safe CLI utilities
# ----------------------------

def sanitize_argv(argv: Optional[List[str]] = None) -> List[str]:
    """Strip Jupyter/Colab's '-f <kernel.json>' and stray kernel json args."""
    if argv is None:
        argv = sys.argv[1:]
    cleaned, skip = [], False
    for a in argv:
        if skip:
            skip = False
            continue
        if a == "-f":
            skip = True
            continue
        if a.endswith(".json") and ("jupyter" in a or "kernel" in a):
            continue
        cleaned.append(a)
    return cleaned


# ----------------------------
# Reproducibility and misc
# ----------------------------

def set_seed(seed: int) -> None:
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False


def ts() -> str:
    return time.strftime("%Y%m%d-%H%M%S")


def ensure_dir(path: str) -> None:
    os.makedirs(path, exist_ok=True)


# ----------------------------
# Data generation (synthetic blobs)
# ----------------------------

@dataclass
class DataConfig:
    dim: int = 2
    n_classes: int = 2
    radius: float = 3.0
    spread: float = 1.0
    extra_noise: float = 0.5  # for dims > 2


def make_dataset(n_samples: int, cfg: DataConfig, seed: int) -> Tuple[np.ndarray, np.ndarray]:
    """Two-class blobs on a circle in 2D, padded with noise for higher dims."""
    rng = np.random.default_rng(seed)
    n0 = n_samples // 2
    n1 = n_samples - n0

    ang0, ang1 = 0.0, math.pi
    c0 = np.array([cfg.radius * math.cos(ang0), cfg.radius * math.sin(ang0)], dtype=np.float32)
    c1 = np.array([cfg.radius * math.cos(ang1), cfg.radius * math.sin(ang1)], dtype=np.float32)

    x0 = rng.normal(0, cfg.spread, size=(n0, 2)).astype(np.float32) + c0
    x1 = rng.normal(0, cfg.spread, size=(n1, 2)).astype(np.float32) + c1
    X = np.vstack([x0, x1])

    if cfg.dim > 2:
        extra = rng.normal(0, cfg.extra_noise, size=(n_samples, cfg.dim - 2)).astype(np.float32)
        X = np.hstack([X, extra])

    y = np.concatenate([np.zeros(n0, dtype=np.int64), np.ones(n1, dtype=np.int64)], axis=0)

    idx = rng.permutation(n_samples)
    return X[idx], y[idx]


def split_dataset(X: np.ndarray, y: np.ndarray, val_ratio: float, test_ratio: float, seed: int):
    rng = np.random.default_rng(seed)
    n = X.shape[0]
    idx = rng.permutation(n)
    n_test = int(test_ratio * n)
    n_val = int(val_ratio * n)

    test_idx = idx[:n_test]
    val_idx = idx[n_test:n_test + n_val]
    train_idx = idx[n_test + n_val:]

    return (X[train_idx], y[train_idx],
            X[val_idx], y[val_idx],
            X[test_idx], y[test_idx])


def to_tensor(x: np.ndarray, y: Optional[np.ndarray], device: torch.device):
    xt = torch.tensor(x, dtype=torch.float32, device=device)
    yt = None if y is None else torch.tensor(y, dtype=torch.long, device=device)
    return xt, yt


def augment_noise(x: torch.Tensor, sigma: float = 0.25) -> torch.Tensor:
    return x + sigma * torch.randn_like(x)


# ----------------------------
# Model
# ----------------------------

class MLP(nn.Module):
    def __init__(self, in_dim: int, n_classes: int, hidden: int = 64):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden),
            nn.ReLU(inplace=True),
            nn.Linear(hidden, hidden),
            nn.ReLU(inplace=True),
            nn.Linear(hidden, n_classes),
        )

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.net(x)


# ----------------------------
# Loss helpers and metrics
# ----------------------------

def entropy_from_logits(logits: torch.Tensor) -> torch.Tensor:
    p = logits.softmax(dim=-1).clamp_min(1e-12)
    return (-(p * p.log()).sum(dim=-1)).mean()


def cross_entropy_soft_targets(logits: torch.Tensor, target_probs: torch.Tensor) -> torch.Tensor:
    logp = logits.log_softmax(dim=-1)
    return (-(target_probs * logp).sum(dim=-1)).mean()


def one_hot(n_classes: int, labels: torch.Tensor) -> torch.Tensor:
    return F.one_hot(labels, num_classes=n_classes).float()


def label_smooth(target_one_hot: torch.Tensor, smoothing: float) -> torch.Tensor:
    if smoothing <= 0.0:
        return target_one_hot
    K = target_one_hot.shape[-1]
    u = torch.full_like(target_one_hot, 1.0 / K)
    return (1.0 - smoothing) * target_one_hot + smoothing * u


def consistency_kl(logits_a: torch.Tensor, logits_b: torch.Tensor) -> torch.Tensor:
    pa = logits_a.softmax(dim=-1).clamp_min(1e-8)
    pb = logits_b.softmax(dim=-1).clamp_min(1e-8)
    kl_ab = (pa * (pa.log() - pb.log())).sum(dim=-1)
    kl_ba = (pb * (pb.log() - pa.log())).sum(dim=-1)
    return 0.5 * (kl_ab + kl_ba).mean()


@torch.no_grad()
def accuracy(logits: torch.Tensor, y: torch.Tensor) -> float:
    pred = logits.argmax(dim=-1)
    return (pred == y).float().mean().item()


# ----------------------------
# Training + evaluation
# ----------------------------

@dataclass
class TrainConfig:
    mode: str
    steps: int
    batch_size: int
    lr: float
    wd: float
    entropy_bonus: float
    label_sharpen: float
    seed: int
    outdir: str
    log_every: int
    dim: int
    n_classes: int
    train_size: int
    val_ratio: float
    test_ratio: float
    device: str = "auto"

    def device_obj(self) -> torch.device:
        if self.device == "cpu":
            return torch.device("cpu")
        if self.device == "cuda":
            return torch.device("cuda")
        return torch.device("cuda" if torch.cuda.is_available() else "cpu")


def make_run_dir(cfg: TrainConfig) -> str:
    run_dir = os.path.join(cfg.outdir, f"{ts()}_{cfg.mode}_seed{cfg.seed}_dim{cfg.dim}")
    ensure_dir(run_dir)
    with open(os.path.join(run_dir, "config.json"), "w") as f:
        json.dump(asdict(cfg), f, indent=2)
    return run_dir


def train_run(cfg: TrainConfig) -> str:
    set_seed(cfg.seed)
    device = cfg.device_obj()
    run_dir = make_run_dir(cfg)

    # Data
    X, y = make_dataset(cfg.train_size + 2048, DataConfig(cfg.dim, cfg.n_classes), seed=cfg.seed)
    Xtr, ytr, Xval, yval, Xte, yte = split_dataset(X, y, cfg.val_ratio, cfg.test_ratio, seed=cfg.seed + 1)
    Xtr_t, ytr_t = to_tensor(Xtr, ytr, device)
    Xval_t, yval_t = to_tensor(Xval, yval, device)

    model = MLP(cfg.dim, cfg.n_classes, hidden=64).to(device)
    opt = torch.optim.AdamW(model.parameters(), lr=cfg.lr, weight_decay=cfg.wd)

    csv_path = os.path.join(run_dir, "metrics.csv")
    with open(csv_path, "w", newline="") as f:
        csv.writer(f).writerow(["step", "loss", "sup", "ssl", "entropy", "val_acc"])

    n = Xtr_t.shape[0]
    for step in range(1, cfg.steps + 1):
        model.train()
        idx = np.random.randint(0, n, size=(cfg.batch_size,))
        xb = Xtr_t[idx]
        yb = ytr_t[idx]

        logits = model(xb)

        loss_sup = torch.tensor(0.0, device=device)
        loss_ssl = torch.tensor(0.0, device=device)

        if cfg.mode in ("supervised", "hybrid"):
            y1h = one_hot(cfg.n_classes, yb)
            ysoft = label_smooth(y1h, cfg.label_sharpen)
            loss_sup = cross_entropy_soft_targets(logits, ysoft)

        if cfg.mode in ("self", "hybrid"):
            xa = augment_noise(xb, 0.25)
            xb2 = augment_noise(xb, 0.25)
            la = model(xa)
            lb = model(xb2)
            loss_ssl = consistency_kl(la, lb)

        ent = entropy_from_logits(logits)
        total = loss_sup + loss_ssl - cfg.entropy_bonus * ent

        opt.zero_grad(set_to_none=True)
        total.backward()
        opt.step()

        if step % cfg.log_every == 0 or step in (1, cfg.steps):
            model.eval()
            with torch.no_grad():
                val_acc = accuracy(model(Xval_t), yval_t)
            with open(csv_path, "a", newline="") as f:
                csv.writer(f).writerow([
                    step,
                    f"{float(total):.6f}",
                    f"{float(loss_sup):.6f}",
                    f"{float(loss_ssl):.6f}",
                    f"{float(ent):.6f}",
                    f"{val_acc:.4f}",
                ])
            print(f"[{step:5d}/{cfg.steps}] mode={cfg.mode} "
                  f"loss={float(total):.4f} sup={float(loss_sup):.4f} ssl={float(loss_ssl):.4f} "
                  f"ent={float(ent):.4f} val_acc={val_acc:.3f}")

    # Save model and a tiny eval set for convenience
    torch.save({
        "model_state": model.state_dict(),
        "in_dim": cfg.dim,
        "n_classes": cfg.n_classes,
        "config": asdict(cfg),
    }, os.path.join(run_dir, "model.pt"))

    # Optional plots
    if _HAS_MPL:
        try_plot_metrics(csv_path, os.path.join(run_dir, "metrics.png"))
        if cfg.dim == 2:
            try_plot_boundary(model, Xte, yte, os.path.join(run_dir, "boundary.png"), device)

    print(f"Done. Outputs saved to: {run_dir}")
    return run_dir


@torch.no_grad()
def eval_run(ckpt_path: str) -> None:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    payload = torch.load(ckpt_path, map_location=device)
    in_dim = payload["in_dim"]
    n_classes = payload["n_classes"]
    cfg = payload["config"]

    model = MLP(in_dim, n_classes, hidden=64).to(device)
    model.load_state_dict(payload["model_state"])
    model.eval()

    X, y = make_dataset(4096, DataConfig(in_dim, n_classes), seed=cfg["seed"] + 999)
    Xt, yt = to_tensor(X, y, device)
    logits = model(Xt)
    acc = accuracy(logits, yt)
    print(f"Eval accuracy on fresh synthetic test set: {acc:.4f}")

    # Plot boundary if 2D
    if _HAS_MPL and in_dim == 2:
        try_plot_boundary(model, X, y, os.path.join(os.path.dirname(ckpt_path), "boundary_eval.png"), device)


@torch.no_grad()
def predict_run(ckpt_path: str, points_csv: Optional[str], points_inline: Optional[str]) -> None:
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    payload = torch.load(ckpt_path, map_location=device)
    in_dim = payload["in_dim"]
    n_classes = payload["n_classes"]

    model = MLP(in_dim, n_classes, hidden=64).to(device)
    model.load_state_dict(payload["model_state"])
    model.eval()

    pts = []
    if points_csv:
        import pandas as pd  # optional; pip install pandas if needed
        df = pd.read_csv(points_csv, header=None)
        pts = df.values.astype(np.float32).tolist()
    elif points_inline:
        # format: "x1,x2; y1,y2; ..."
        for chunk in points_inline.split(";"):
            row = [float(t) for t in chunk.strip().split(",") if t.strip() != ""]
            if len(row) != in_dim:
                raise ValueError(f"Each point must have {in_dim} values.")
            pts.append(row)
    else:
        raise ValueError("Provide --points-csv or --points-inline.")

    Xt = torch.tensor(np.array(pts, dtype=np.float32), device=device)
    logits = model(Xt)
    probs = logits.softmax(dim=-1).cpu().numpy()
    preds = probs.argmax(axis=-1).tolist()

    out = [{"x": p, "pred": int(c), "probs": [float(q) for q in pr]} for p, c, pr in zip(pts, preds, probs)]
    print(json.dumps(out, indent=2))


# ----------------------------
# Plotting helpers
# ----------------------------

def try_plot_metrics(csv_path: str, out_png: str) -> None:
    if not os.path.exists(csv_path):
        return
    steps, loss, sup, ssl, ent, acc = [], [], [], [], [], []
    with open(csv_path, "r") as f:
        reader = csv.DictReader(f)
        for r in reader:
            steps.append(int(r["step"]))
            loss.append(float(r["loss"]))
            sup.append(float(r["sup"]))
            ssl.append(float(r["ssl"]))
            ent.append(float(r["entropy"]))
            acc.append(float(r["val_acc"]))
    if not steps:
        return
    fig, ax = plt.subplots(1, 1, figsize=(7, 4))
    ax.plot(steps, loss, label="total")
    ax.plot(steps, sup, label="supervised")
    ax.plot(steps, ssl, label="self")
    ax.plot(steps, ent, label="entropy")
    ax.plot(steps, acc, label="val_acc")
    ax.set_title("Training metrics")
    ax.set_xlabel("step")
    ax.grid(True, alpha=0.3)
    ax.legend()
    fig.tight_layout()
    fig.savefig(out_png, dpi=140)
    plt.close(fig)


def try_plot_boundary(model: nn.Module, X: np.ndarray, y: np.ndarray, out_png: str, device: torch.device) -> None:
    """2D decision boundary + points."""
    if X.shape[1] != 2:
        return
    x_min, x_max = X[:, 0].min() - 1.5, X[:, 0].max() + 1.5
    y_min, y_max = X[:, 1].min() - 1.5, X[:, 1].max() + 1.5
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 400), np.linspace(y_min, y_max, 400))
    grid = np.c_[xx.ravel(), yy.ravel()].astype(np.float32)
    with torch.no_grad():
        logits = model(torch.tensor(grid, device=device))
        Z = logits.softmax(dim=-1)[:, 1].cpu().numpy().reshape(xx.shape)
    fig, ax = plt.subplots(1, 1, figsize=(6, 5))
    cs = ax.contourf(xx, yy, Z, levels=30, cmap="coolwarm", alpha=0.8)
    fig.colorbar(cs, ax=ax, label="P(class=1)")
    ax.scatter(X[:, 0], X[:, 1], c=y, cmap="bwr", edgecolor="k", s=12, alpha=0.8)
    ax.set_title("Decision boundary")
    ax.set_xlabel("x1")
    ax.set_ylabel("x2")
    ax.grid(True, alpha=0.2)
    fig.tight_layout()
    fig.savefig(out_png, dpi=140)
    plt.close(fig)


# ----------------------------
# CLI
# ----------------------------

def build_parser() -> argparse.ArgumentParser:
    p = argparse.ArgumentParser(description="MetaIntelligence end-to-end toolkit (notebook/Colab-safe)")
    sub = p.add_subparsers(dest="cmd")

    # Train
    t = sub.add_parser("train", help="Train a model (self/supervised/hybrid)")
    t.add_argument("--mode", choices=["self", "supervised", "hybrid"], default="supervised")
    t.add_argument("--steps", type=int, default=500)
    t.add_argument("--batch-size", type=int, default=128)
    t.add_argument("--lr", type=float, default=1e-3)
    t.add_argument("--wd", type=float, default=0.0)
    t.add_argument("--entropy-bonus", type=float, default=0.0)
    t.add_argument("--label-sharpen", type=float, default=0.0)
    t.add_argument("--seed", type=int, default=42)
    t.add_argument("--outdir", type=str, default="runs")
    t.add_argument("--log-every", type=int, default=50)
    t.add_argument("--dim", type=int, default=2)
    t.add_argument("--n-classes", type=int, default=2)
    t.add_argument("--train-size", type=int, default=8192)
    t.add_argument("--val-ratio", type=float, default=0.15)
    t.add_argument("--test-ratio", type=float, default=0.15)
    t.add_argument("--device", type=str, default="auto", choices=["auto", "cpu", "cuda"])

    # Eval
    e = sub.add_parser("eval", help="Evaluate a saved checkpoint on a fresh synthetic test set")
    e.add_argument("--ckpt", type=str, required=True, help="Path to model.pt")

    # Predict
    p2 = sub.add_parser("predict", help="Predict for given points")
    p2.add_argument("--ckpt", type=str, required=True, help="Path to model.pt")
    p2.add_argument("--points-csv", type=str, default=None, help="CSV file without header, each row is a point")
    p2.add_argument("--points-inline", type=str, default=None,
                    help='Inline points, e.g., "x1,x2; y1,y2" (must match input dim)')

    return p


def main(argv: Optional[List[str]] = None) -> None:
    argv = sanitize_argv(argv)
    parser = build_parser()
    if not argv:
        parser.print_help()
        return
    args, _ = parser.parse_known_args(argv)

    if args.cmd == "train":
        cfg = TrainConfig(
            mode=args.mode,
            steps=args.steps,
            batch_size=args.batch_size,
            lr=args.lr,
            wd=args.wd,
            entropy_bonus=args.entropy_bonus,
            label_sharpen=args.label_sharpen,
            seed=args.seed,
            outdir=args.outdir,
            log_every=args.log_every,
            dim=args.dim,
            n_classes=args.n_classes,
            train_size=args.train_size,
            val_ratio=args.val_ratio,
            test_ratio=args.test_ratio,
            device=args.device,
        )
        train_run(cfg)
    elif args.cmd == "eval":
        eval_run(args.ckpt)
    elif args.cmd == "predict":
        predict_run(args.ckpt, args.points_csv, args.points_inline)
    else:
        parser.print_help()


if __name__ == "__main__":
    try:
        main()
    except SystemExit:
        # Friendlier in notebooks
        print("Use 'exit', 'quit', or Ctrl-D to exit.", file=sys.stderr)
        raise