<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/train_post_intelligence_ai_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
"""
train_post_intelligence_ai.py

1. Generate synthetic PostIntelligence data (6 inputs → 3 targets)
2. Normalize in float32
3. Define PostIntelligenceAI with Dropout
4. Enforce toy physics residual in loss
5. Train with AdamW, LR scheduler, grad clipping, early stopping
6. Save best model, visualize training & uncertainty
"""

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt

# -----------------------------------------------------------------------------
# 1. Synthetic Dataset
# -----------------------------------------------------------------------------
class PostIntelligenceDataset(Dataset):
    def __init__(self, n_samples=5000, seed=42):
        np.random.seed(seed)
        # Inputs: cognitive flux F, self-emerging S, stability E, plus A, B, C
        F = np.random.uniform(0.0, 2.0, (n_samples,1))
        S = np.random.uniform(0.1, 5.0, (n_samples,1))
        E = np.random.uniform(-3.0, 3.0,(n_samples,1))
        A = np.random.uniform(0.0, 10.0,(n_samples,1))
        B = np.random.uniform(-1.0, 1.0,(n_samples,1))
        C = np.random.uniform(0.5, 2.0, (n_samples,1))
        X_raw = np.hstack([F, S, E, A, B, C]).astype(np.float32)

        # Toy ground truth definitions:
        # 1) flux_metric = F^2 / (S + eps)
        # 2) coherence_metric = sin(E) + A * B
        # 3) self_emerge_eq = C * (flux_metric + coherence_metric)
        eps = 1e-6
        flux = F**2 / (S + eps)
        coh  = np.sin(E) + A * B
        eq   = C * (flux + coh)
        Y_raw = np.hstack([flux, coh, eq]).astype(np.float32)

        # add small noise
        noise_scale = 0.01 * Y_raw.std(axis=0)
        Y_raw += (noise_scale * np.random.randn(*Y_raw.shape)).astype(np.float32)

        # compute normalization stats (float32)
        self.X_mean = X_raw.mean(axis=0).astype(np.float32)
        self.X_std  = X_raw.std(axis=0).astype(np.float32) + eps
        self.Y_mean = Y_raw.mean(axis=0).astype(np.float32)
        self.Y_std  = Y_raw.std(axis=0).astype(np.float32) + eps

        # normalize
        self.X = ((X_raw - self.X_mean) / self.X_std).astype(np.float32)
        self.Y = ((Y_raw - self.Y_mean) / self.Y_std).astype(np.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx])

# -----------------------------------------------------------------------------
# 2. Model with Dropout
# -----------------------------------------------------------------------------
class PostIntelligenceAI(nn.Module):
    def __init__(self, input_dim=6, hidden_dim=32, output_dim=3, p_drop=0.1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(inplace=True),
            nn.Dropout(p_drop),
            nn.Linear(hidden_dim, output_dim),
        )
    def forward(self, x):
        return self.net(x)

# -----------------------------------------------------------------------------
# 3. Physics‐Informed Residual
# -----------------------------------------------------------------------------
def physics_residual(pred, X, stats):
    # Denormalize
    X_den = X * stats['X_std'] + stats['X_mean']
    F, S, E, A, B, C = X_den.t()
    eps = 1e-6

    flux_true = F**2 / (S + eps)
    coh_true  = torch.sin(E) + A * B
    eq_true   = C * (flux_true + coh_true)

    Y_true = torch.stack([flux_true, coh_true, eq_true], dim=1)
    Y_norm = (Y_true - stats['Y_mean']) / stats['Y_std']
    return nn.MSELoss()(pred, Y_norm)

# -----------------------------------------------------------------------------
# 4. Combined Loss
# -----------------------------------------------------------------------------
def total_loss(pred, target, X, stats, lam=1.0):
    mse  = nn.MSELoss()(pred, target)
    phys = physics_residual(pred, X, stats)
    return mse + lam * phys, mse, phys

# -----------------------------------------------------------------------------
# 5. MC‐Dropout for Uncertainty
# -----------------------------------------------------------------------------
def mc_dropout_predict(model, X, T=50):
    model.train()  # keep dropout on
    preds = []
    with torch.no_grad():
        for _ in range(T):
            preds.append(model(X))
    stacked = torch.stack(preds)
    return stacked.mean(dim=0), stacked.std(dim=0)

# -----------------------------------------------------------------------------
# 6. Training Loop
# -----------------------------------------------------------------------------
def train(model, train_dl, val_dl, stats, device,
          lr=1e-3, wd=1e-5, lam=1.0,
          epochs=100, patience=10):

    model.to(device)
    opt       = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(opt, 'min', factor=0.5, patience=5)

    best_val = float('inf')
    wait     = 0
    history  = {'train': [], 'val': []}

    for epoch in range(1, epochs+1):
        # Train step
        model.train()
        train_loss = 0.0
        for Xb, Yb in train_dl:
            Xb, Yb = Xb.to(device), Yb.to(device)
            pred = model(Xb)
            loss, _, _ = total_loss(pred, Yb, Xb, stats, lam)

            opt.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            opt.step()
            train_loss += loss.item() * Xb.size(0)
        train_loss /= len(train_dl.dataset)

        # Validation step
        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for Xv, Yv in val_dl:
                Xv, Yv = Xv.to(device), Yv.to(device)
                pred = model(Xv)
                loss, _, _ = total_loss(pred, Yv, Xv, stats, lam)
                val_loss += loss.item() * Xv.size(0)
        val_loss /= len(val_dl.dataset)

        scheduler.step(val_loss)
        history['train'].append(train_loss)
        history['val'].append(val_loss)
        print(f"Epoch {epoch:03d} | Train {train_loss:.4e} | Val {val_loss:.4e}")

        # Checkpoint
        if val_loss < best_val - 1e-6:
            best_val = val_loss
            wait = 0
            torch.save(model.state_dict(), "best_post_intelligence_ai.pth")
        else:
            wait += 1
            if wait >= patience:
                print("Early stopping.")
                break

    # Load best weights
    model.load_state_dict(torch.load("best_post_intelligence_ai.pth", map_location=device))
    return history

# -----------------------------------------------------------------------------
# 7. Plots
# -----------------------------------------------------------------------------
def plot_history(hist):
    plt.figure()
    plt.plot(hist['train'], label='Train')
    plt.plot(hist['val'],   label='Val')
    plt.xlabel('Epoch'); plt.ylabel('Loss'); plt.legend()
    plt.show()

def plot_parity(model, ds, stats, device):
    model.eval()
    X = torch.from_numpy(ds.X).to(device)
    with torch.no_grad():
        P = model(X).cpu().numpy()
    Y_true = (ds.Y * ds.Y_std + ds.Y_mean)
    P_denorm = P * ds.Y_std + ds.Y_mean

    plt.figure(figsize=(6,6))
    plt.scatter(Y_true.ravel(), P_denorm.ravel(), s=4, alpha=0.5)
    m, M = Y_true.min(), Y_true.max()
    plt.plot([m, M], [m, M], 'r--')
    plt.xlabel("True"); plt.ylabel("Pred")
    plt.show()

def plot_unc(model, ds, stats, device):
    X = torch.from_numpy(ds.X).to(device)
    _, std = mc_dropout_predict(model, X, T=50)
    u = std[:,0].cpu().numpy()
    plt.hist(u, bins=30, color='teal')
    plt.xlabel('Prediction Stddev')
    plt.show()

# -----------------------------------------------------------------------------
# 8. Main
# -----------------------------------------------------------------------------
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Prepare data & stats
    ds = PostIntelligenceDataset(n_samples=5000)
    stats = {
        'X_mean': torch.tensor(ds.X_mean, dtype=torch.float32, device=device),
        'X_std':  torch.tensor(ds.X_std,  dtype=torch.float32, device=device),
        'Y_mean': torch.tensor(ds.Y_mean, dtype=torch.float32, device=device),
        'Y_std':  torch.tensor(ds.Y_std,  dtype=torch.float32, device=device),
    }

    # Split and loaders
    val_count = int(len(ds) * 0.2)
    tr_ds, va_ds = random_split(ds, [len(ds)-val_count, val_count])
    tr_dl = DataLoader(tr_ds, batch_size=128, shuffle=True)
    va_dl = DataLoader(va_ds, batch_size=256, shuffle=False)

    # Model, train & plot
    model = PostIntelligenceAI()
    history = train(model, tr_dl, va_dl, stats, device)

    plot_history(history)
    plot_parity(model, ds, stats, device)
    plot_unc(model, ds, stats, device)