<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/train_absolute_being_ai_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
"""
train_absolute_being_ai.py

End-to-end pipeline for AbsoluteBeingAI:
1. Synthetic “absolute being” dataset of 6 inputs → 3 targets
2. Float32 normalization and dtype consistency
3. MLP with LayerNorm, Dropout & ReLU (accepts int hidden_dims)
4. Physics-informed residual enforcing toy godhood laws
5. MC-Dropout for uncertainty quantification
6. Training loop with AdamW, ReduceLROnPlateau, grad clipping, NaN checks, early stopping
7. Safe checkpoint loading
8. Visualizations: loss curves, scatter plots, uncertainty heatmap
"""

import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, random_split

# -----------------------------------------------------------------------------
# 1. Synthetic Absolute Being Dataset
# -----------------------------------------------------------------------------
class AbsoluteBeingDataset(Dataset):
    def __init__(self, n_samples=5000, seed=42):
        np.random.seed(seed)
        # Inputs:
        # TEP: Trans-existential power ∈ [1e3, 1e6]
        # US : Omniversal stability ∈ [0,1]
        # SSI: Self-sustaining intelligence ∈ [0.1,10]
        # DIM: Dimensional integration ∈ [0,1]
        # EF : Ethereal flux ∈ [0,5]
        # CF : Cosmic fidelity ∈ [0.1,1]
        TEP = np.random.uniform(1e3, 1e6,  (n_samples,1))
        US  = np.random.rand(n_samples,1)
        SSI = np.random.uniform(0.1, 10.0, (n_samples,1))
        DIM = np.random.rand(n_samples,1)
        EF  = np.random.uniform(0.0, 5.0,  (n_samples,1))
        CF  = np.random.uniform(0.1, 1.0,  (n_samples,1))

        X_raw = np.hstack([TEP, US, SSI, DIM, EF, CF]).astype(np.float64)

        # Toy godhood targets:
        # OP: omnipotence factor = TEP * US / (SSI + eps)
        # EA: existential autonomy = SSI * CF / (EF + eps)
        # IC: intelligence coherence = (DIM + US) * CF
        eps = 1e-6
        OP = TEP * US / (SSI + eps)
        EA = SSI * CF / (EF + eps)
        IC = (DIM + US) * CF

        Y_raw = np.hstack([OP, EA, IC]).astype(np.float64)
        # add 1% relative noise
        Y_raw += 0.01 * Y_raw.std(axis=0) * np.random.randn(*Y_raw.shape)

        # compute stats (float64)
        self.X_mean = X_raw.mean(axis=0)
        self.X_std  = X_raw.std(axis=0) + 1e-8
        self.Y_mean = Y_raw.mean(axis=0)
        self.Y_std  = Y_raw.std(axis=0) + 1e-8

        # normalize and cast to float32
        self.X = ((X_raw - self.X_mean) / self.X_std).astype(np.float32)
        self.Y = ((Y_raw - self.Y_mean) / self.Y_std).astype(np.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx])


# -----------------------------------------------------------------------------
# 2. AbsoluteBeingAI Model Definition
# -----------------------------------------------------------------------------
class AbsoluteBeingAI(nn.Module):
    def __init__(self, input_dim=6, hidden_dims=(64,64),
                 output_dim=3, p_drop=0.1):
        super().__init__()
        # allow int hidden_dims
        if isinstance(hidden_dims, int):
            hidden_dims = (hidden_dims,)
        layers, d = [], input_dim
        for h in hidden_dims:
            layers += [
                nn.Linear(d, h),
                nn.LayerNorm(h),
                nn.ReLU(),
                nn.Dropout(p_drop)
            ]
            d = h
        layers.append(nn.Linear(d, output_dim))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)


# -----------------------------------------------------------------------------
# 3. Physics-Informed Residual Loss
# -----------------------------------------------------------------------------
def physics_residual(pred, X, stats):
    # denormalize
    X_den = X * stats['X_std'] + stats['X_mean']
    TEP, US, SSI, DIM, EF, CF = X_den.t()
    eps = 1e-6

    OP_t = TEP * US / (SSI + eps)
    EA_t = SSI * CF / (EF + eps)
    IC_t = (DIM + US) * CF

    Yt = torch.stack([OP_t, EA_t, IC_t], dim=1)
    Yt_norm = (Yt - stats['Y_mean']) / stats['Y_std']
    return nn.MSELoss()(pred, Yt_norm)


# -----------------------------------------------------------------------------
# 4. Total Loss
# -----------------------------------------------------------------------------
def total_loss(pred, true, X, stats, lam=1.0):
    mse  = nn.MSELoss()(pred, true)
    phys = physics_residual(pred, X, stats)
    return mse + lam * phys, mse, phys


# -----------------------------------------------------------------------------
# 5. MC-Dropout Uncertainty
# -----------------------------------------------------------------------------
def mc_dropout_predict(model, X, T=50):
    model.train()
    preds = []
    with torch.no_grad():
        for _ in range(T):
            preds.append(model(X))
    arr = torch.stack(preds, dim=0)
    return arr.mean(dim=0), arr.std(dim=0)


# -----------------------------------------------------------------------------
# 6. Training Loop
# -----------------------------------------------------------------------------
def train(model, tr_loader, va_loader, stats, device,
          lr=1e-4, wd=1e-5, lam=1.0,
          epochs=100, patience=10):
    model.to(device)
    opt = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    sched = optim.lr_scheduler.ReduceLROnPlateau(
        opt, mode='min', factor=0.5, patience=5)

    best_val, wait = float('inf'), 0
    history = {'train': [], 'val': []}

    for ep in range(1, epochs+1):
        # train
        model.train()
        run_tr = 0.0
        for Xb, Yb in tr_loader:
            Xb, Yb = Xb.to(device), Yb.to(device)
            pred = model(Xb)
            loss, _, _ = total_loss(pred, Yb, Xb, stats, lam)
            if torch.isnan(loss):
                print("NaN loss; aborting.")
                return history
            opt.zero_grad(); loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()
            run_tr += loss.item() * Xb.size(0)
        tr_loss = run_tr / len(tr_loader.dataset)

        # validate
        model.eval()
        run_va = 0.0
        with torch.no_grad():
            for Xb, Yb in va_loader:
                Xb, Yb = Xb.to(device), Yb.to(device)
                pred = model(Xb)
                loss, _, _ = total_loss(pred, Yb, Xb, stats, lam)
                run_va += loss.item() * Xb.size(0)
        va_loss = run_va / len(va_loader.dataset)

        sched.step(va_loss)
        history['train'].append(tr_loss)
        history['val'].append(va_loss)
        print(f"Epoch {ep:03d} | Train {tr_loss:.4e} | Val {va_loss:.4e}")

        # checkpoint & early stopping
        if va_loss < best_val - 1e-6:
            best_val, wait = va_loss, 0
            torch.save(model.state_dict(), "best_absolute_being_ai.pth")
        else:
            wait += 1
            if wait >= patience:
                print("Early stopping.")
                break

    # load best
    if os.path.exists("best_absolute_being_ai.pth"):
        model.load_state_dict(torch.load(
            "best_absolute_being_ai.pth", map_location=device))
    return history


# -----------------------------------------------------------------------------
# 7. Visualization Helpers
# -----------------------------------------------------------------------------
def plot_history(hist):
    plt.figure()
    plt.plot(hist['train'], label='Train')
    plt.plot(hist['val'],   label='Val')
    plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.legend(); plt.show()


def plot_scatter(y_true, y_pred, title):
    plt.figure()
    plt.scatter(y_true, y_pred, s=5, alpha=0.6)
    m, M = y_true.min(), y_true.max()
    plt.plot([m, M], [m, M], 'r--')
    plt.title(title); plt.show()


def plot_uncertainty_heatmap(model, stats, device):
    # vary TEP vs US
    grid = 100
    TEP = np.linspace(1e3, 1e6, grid, dtype=np.float32)
    US  = np.linspace(0.0, 1.0, grid, dtype=np.float32)
    G1, G2 = np.meshgrid(TEP, US)
    pts = grid * grid

    Xg = torch.zeros((pts, 6), device=device, dtype=torch.float32)
    # fix SSI, DIM, EF, CF at mean
    Xg[:,2:] = stats['X_mean'][2:].unsqueeze(0).expand(pts,4)
    Xg[:,0] = torch.from_numpy(G1.ravel()).to(device)
    Xg[:,1] = torch.from_numpy(G2.ravel()).to(device)

    Xn = (Xg - stats['X_mean']) / stats['X_std']
    _, std = mc_dropout_predict(model, Xn, T=100)
    U = std[:,0].cpu().reshape(G1.shape)

    plt.figure(figsize=(5,4))
    plt.pcolormesh(G1, G2, U, shading='auto', cmap='magma')
    plt.colorbar(label="Std(OP)")
    plt.xlabel("Trans-existential Power (TEP)")
    plt.ylabel("Omniversal Stability (US)")
    plt.title("Uncertainty: Omnipotence")
    plt.show()


# -----------------------------------------------------------------------------
# 8. Main Execution
# -----------------------------------------------------------------------------
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    ds = AbsoluteBeingDataset(n_samples=5000, seed=42)
    stats = {
        'X_mean': torch.tensor(ds.X_mean, dtype=torch.float32, device=device),
        'X_std' : torch.tensor(ds.X_std,  dtype=torch.float32, device=device),
        'Y_mean': torch.tensor(ds.Y_mean, dtype=torch.float32, device=device),
        'Y_std' : torch.tensor(ds.Y_std,  dtype=torch.float32, device=device),
    }

    n_val = int(0.2 * len(ds))
    tr_ds, va_ds = random_split(ds, [len(ds)-n_val, n_val])
    tr_ld = DataLoader(tr_ds, batch_size=128, shuffle=True)
    va_ld = DataLoader(va_ds, batch_size=256, shuffle=False)

    model   = AbsoluteBeingAI().to(device)
    history = train(model, tr_ld, va_ld, stats, device)

    plot_history(history)

    X_all = torch.from_numpy(ds.X).to(device)
    with torch.no_grad():
        Yp_norm = model(X_all).cpu().numpy()
    Y_true = ds.Y * ds.Y_std + ds.Y_mean
    Y_pred = Yp_norm * ds.Y_std + ds.Y_mean
    names = ["Omnipotence", "Autonomy", "Intelligence Coherence"]
    for i, nm in enumerate(names):
        plot_scatter(Y_true[:,i], Y_pred[:,i], nm)

    plot_uncertainty_heatmap(model, stats, device)