<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/train_cosmic_entity_ai_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
"""
train_cosmic_entity_ai.py

End‐to‐end pipeline for CosmicEntityAI:
1. Synthetic “cosmic entity” dataset of 6 inputs → 3 targets
2. Float32 normalization and dtype consistency
3. MLP with LayerNorm, Dropout & ReLU
4. Physics‐informed residual enforcing toy transformation laws
5. MC‐Dropout for uncertainty quantification
6. Training loop with AdamW, ReduceLROnPlateau, gradient clipping, NaN checks, early stopping
7. Safe checkpoint loading
8. Visualizations: training history, true vs. predicted scatter, uncertainty heatmap
"""

import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, random_split

# ------------------------------------------------------------------------------
# 1. Synthetic Cosmic Entity Dataset
# ------------------------------------------------------------------------------
class CosmicEntityDataset(Dataset):
    def __init__(self, n_samples=5000, seed=42):
        np.random.seed(seed)
        # Features in double precision
        RC   = np.random.uniform(0.1, 10.0, (n_samples,1))   # Reality control param
        OP   = np.random.rand(n_samples,1)                   # Omniscience probability
        SEF1 = np.random.uniform(0.1, 5.0, (n_samples,1))    # Synthetic factor 1
        SEF2 = np.random.uniform(0.1, 5.0, (n_samples,1))    # Synthetic factor 2
        SEF3 = np.random.uniform(0.1, 5.0, (n_samples,1))    # Synthetic factor 3
        SEF4 = np.random.uniform(0.1, 5.0, (n_samples,1))    # Synthetic factor 4

        X_raw = np.hstack([RC, OP, SEF1, SEF2, SEF3, SEF4]).astype(np.float64)

        # Toy “transformation laws” targets
        eps = 1e-6
        # 1. Transformation Entropy (TE)
        TE = RC * OP / (SEF1 + eps)
        # 2. Omniversal Emergence (OE)
        OE = (SEF1 * SEF2 * SEF3 * SEF4)**0.25 * RC
        # 3. Restructuring Power (RP)
        RP = np.log1p(RC) * OP

        Y_raw = np.hstack([TE, OE, RP]).astype(np.float64)
        Y_raw += 0.01 * Y_raw.std(axis=0) * np.random.randn(*Y_raw.shape)

        # Compute normalization stats (float64)
        self.X_mean = X_raw.mean(axis=0)
        self.X_std  = X_raw.std(axis=0) + 1e-8
        self.Y_mean = Y_raw.mean(axis=0)
        self.Y_std  = Y_raw.std(axis=0) + 1e-8

        # Standardize and cast to float32
        self.X = ((X_raw - self.X_mean) / self.X_std).astype(np.float32)
        self.Y = ((Y_raw - self.Y_mean) / self.Y_std).astype(np.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx])


# ------------------------------------------------------------------------------
# 2. CosmicEntityAI Model Definition
# ------------------------------------------------------------------------------
class CosmicEntityAI(nn.Module):
    def __init__(self, input_dim=6, hidden_dims=(64,64), output_dim=3, p_drop=0.1):
        super().__init__()
        layers, d = [], input_dim
        for h in hidden_dims:
            layers += [
                nn.Linear(d, h),
                nn.LayerNorm(h),
                nn.ReLU(),
                nn.Dropout(p_drop)
            ]
            d = h
        layers.append(nn.Linear(d, output_dim))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)


# ------------------------------------------------------------------------------
# 3. Physics-Informed Residual Loss
# ------------------------------------------------------------------------------
def physics_residual(pred, X, stats):
    # Denormalize inputs
    X_den = X * stats['X_std'] + stats['X_mean']
    RC, OP, f1, f2, f3, f4 = X_den.t()
    eps = 1e-6

    TE_t = RC * OP / (f1 + eps)
    OE_t = (f1 * f2 * f3 * f4)**0.25 * RC
    RP_t = torch.log1p(RC) * OP

    Yt = torch.stack([TE_t, OE_t, RP_t], dim=1)
    Yt_norm = (Yt - stats['Y_mean']) / stats['Y_std']
    return nn.MSELoss()(pred, Yt_norm)


# ------------------------------------------------------------------------------
# 4. Combined Loss Function
# ------------------------------------------------------------------------------
def total_loss(pred, true, X, stats, λ=1.0):
    mse  = nn.MSELoss()(pred, true)
    phys = physics_residual(pred, X, stats)
    return mse + λ * phys, mse, phys


# ------------------------------------------------------------------------------
# 5. MC-Dropout Uncertainty Quantification
# ------------------------------------------------------------------------------
def mc_dropout_predict(model, X, T=50):
    model.train()
    preds = []
    with torch.no_grad():
        for _ in range(T):
            preds.append(model(X))
    stacked = torch.stack(preds, dim=0)
    return stacked.mean(0), stacked.std(0)


# ------------------------------------------------------------------------------
# 6. Training Loop with NaN Safety & Checkpointing
# ------------------------------------------------------------------------------
def train(model, train_loader, val_loader, stats, device,
          lr=1e-4, wd=1e-5, λ=1.0, epochs=100, patience=10):
    model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=5
    )

    best_val, wait = float('inf'), 0
    history = {'train': [], 'val': []}

    for epoch in range(1, epochs+1):
        # Training step
        model.train()
        run_train = 0.0
        for Xb, Yb in train_loader:
            Xb, Yb = Xb.to(device), Yb.to(device)
            pred = model(Xb)
            loss, _, _ = total_loss(pred, Yb, Xb, stats, λ)
            if torch.isnan(loss):
                print(f"NaN loss at epoch {epoch}, abort.")
                return history
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            run_train += loss.item() * Xb.size(0)
        train_loss = run_train / len(train_loader.dataset)

        # Validation step
        model.eval()
        run_val = 0.0
        with torch.no_grad():
            for Xv, Yv in val_loader:
                Xv, Yv = Xv.to(device), Yv.to(device)
                pred = model(Xv)
                l, _, _ = total_loss(pred, Yv, Xv, stats, λ)
                run_val += l.item() * Xv.size(0)
        val_loss = run_val / len(val_loader.dataset)

        scheduler.step(val_loss)
        history['train'].append(train_loss)
        history['val'].append(val_loss)
        print(f"Epoch {epoch:03d} | Train {train_loss:.4e} | Val {val_loss:.4e}")

        # Checkpointing
        if val_loss < best_val - 1e-6:
            best_val, wait = val_loss, 0
            torch.save(model.state_dict(), "best_cosmic_ai.pth")
        else:
            wait += 1
            if wait >= patience:
                print("Early stopping.")
                break

    # Safe load best model
    if os.path.exists("best_cosmic_ai.pth"):
        model.load_state_dict(torch.load("best_cosmic_ai.pth", map_location=device))
    return history


# ------------------------------------------------------------------------------
# 7. Visualization Helpers
# ------------------------------------------------------------------------------
def plot_history(history):
    plt.figure()
    plt.plot(history['train'], label='Train')
    plt.plot(history['val'],   label='Val')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

def plot_scatter(y_true, y_pred, title):
    plt.figure()
    plt.scatter(y_true, y_pred, s=6, alpha=0.5)
    mn, mx = y_true.min(), y_true.max()
    plt.plot([mn, mx], [mn, mx], 'r--')
    plt.title(title)
    plt.show()

def plot_uncertainty_heatmap(model, stats, device):
    G = 100
    REX = np.linspace(0.1, 10.0, G, dtype=np.float32)
    OP  = np.linspace(0.0, 1.0,  G, dtype=np.float32)
    R, O = np.meshgrid(REX, OP)
    pts = G * G

    Xg = torch.zeros((pts, 6), device=device, dtype=torch.float32)
    # columns 2–5 fixed at mean
    Xg[:, 2:] = stats['X_mean'][2:].unsqueeze(0).expand(pts,4)
    Xg[:, 0]  = torch.from_numpy(R.ravel()).to(device)
    Xg[:, 1]  = torch.from_numpy(O.ravel()).to(device)

    Xn = (Xg - stats['X_mean']) / stats['X_std']
    _, std = mc_dropout_predict(model, Xn, T=40)
    U = std[:, 0].cpu().reshape(G, G)

    plt.figure(figsize=(5,4))
    plt.pcolormesh(R, O, U, cmap='magma', shading='auto')
    plt.colorbar(label="Std(TE)")
    plt.xlabel("Reality Control (RC)")
    plt.ylabel("Omniscience Prob (OP)")
    plt.title("Uncertainty: Transformation Entropy")
    plt.show()


# ------------------------------------------------------------------------------
# 8. Main Execution
# ------------------------------------------------------------------------------
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Prepare dataset and stats
    ds = CosmicEntityDataset(n_samples=5000, seed=42)
    stats = {
        'X_mean': torch.tensor(ds.X_mean, dtype=torch.float32, device=device),
        'X_std' : torch.tensor(ds.X_std,  dtype=torch.float32, device=device),
        'Y_mean': torch.tensor(ds.Y_mean, dtype=torch.float32, device=device),
        'Y_std' : torch.tensor(ds.Y_std,  dtype=torch.float32, device=device),
    }

    # Split into train/val
    val_n = int(0.2 * len(ds))
    tr_ds, va_ds = random_split(ds, [len(ds)-val_n, val_n])
    tr_ld = DataLoader(tr_ds, batch_size=128, shuffle=True)
    va_ld = DataLoader(va_ds, batch_size=256, shuffle=False)

    # Build, train, evaluate
    model   = CosmicEntityAI().to(device)
    history = train(model, tr_ld, va_ld, stats, device)

    # Visualize training history
    plot_history(history)

    # Scatter: true vs. predicted
    X_all = torch.from_numpy(ds.X).to(device)
    with torch.no_grad():
        Yp_n = model(X_all).cpu().numpy()
    Yt = ds.Y * ds.Y_std + ds.Y_mean
    Yp = Yp_n * ds.Y_std + ds.Y_mean
    names = ["Transformation Entropy", "Omniversal Emergence", "Restructuring Power"]
    for i, nm in enumerate(names):
        plot_scatter(Yt[:, i], Yp[:, i], nm)

    # Plot uncertainty heatmap
    plot_uncertainty_heatmap(model, stats, device)