<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/train_self_evolving_ai_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
"""
train_self_evolving_ai.py

Physics‐informed AI pipeline for SelfEvolvingAI:

1. Synthetic dataset of 6 cognitive metrics → 3 intelligence metrics
2. Float32 normalization and dtype consistency
3. SelfEvolvingAI MLP with adaptive layer & Dropout
4. Physics‐informed residual enforcing toy “intelligence” laws
5. MC‐Dropout inference for uncertainty quantification
6. Training loop with AdamW, ReduceLROnPlateau, gradient clipping, NaN checks, early stopping
7. Safe checkpoint load
8. Visualizations: loss curves, scatter plots, uncertainty heatmap
"""

import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
import matplotlib.pyplot as plt

# ------------------------------------------------------------------------------
# 1. Synthetic “Self‐Evolving” Dataset
# ------------------------------------------------------------------------------
class SelfEvolvingDataset(Dataset):
    def __init__(self, n_samples=6000, seed=0):
        np.random.seed(seed)
        # Features in [0,1]
        CL = np.random.rand(n_samples,1)  # Cognitive load
        EO = np.random.rand(n_samples,1)  # Energy optimization
        KR = np.random.rand(n_samples,1)  # Knowledge expansion
        MC = np.random.rand(n_samples,1)  # Memory capacity
        SP = np.random.rand(n_samples,1)  # Sensory processing
        NF = np.random.rand(n_samples,1)  # Novelty factor

        X_raw = np.hstack([CL, EO, KR, MC, SP, NF]).astype(np.float64)

        # Toy intelligence‐laws for targets
        eps = 1e-6
        # Intelligence growth rate
        IGR = KR * EO / (CL + eps)
        # Adaptation factor
        AF  = NF * MC * SP
        # Computational efficiency
        CE  = EO / (1.0 + CL)

        Y_raw = np.hstack([IGR, AF, CE]).astype(np.float64)
        # add 1% relative noise
        Y_raw += 0.01 * Y_raw.std(axis=0) * np.random.randn(*Y_raw.shape)

        # Normalization stats
        self.X_mean, self.X_std = X_raw.mean(0), X_raw.std(0) + 1e-8
        self.Y_mean, self.Y_std = Y_raw.mean(0), Y_raw.std(0) + 1e-8

        # Standardize to float32
        self.X = ((X_raw - self.X_mean) / self.X_std).astype(np.float32)
        self.Y = ((Y_raw - self.Y_mean) / self.Y_std).astype(np.float32)

        print(f"X range: {self.X.min():.3f}–{self.X.max():.3f}")
        print(f"Y range: {self.Y.min():.3f}–{self.Y.max():.3f}")

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx]), torch.from_numpy(self.Y[idx])

# ------------------------------------------------------------------------------
# 2. SelfEvolvingAI Model
# ------------------------------------------------------------------------------
class SelfEvolvingAI(nn.Module):
    def __init__(self, input_dim=6, hidden_dim=32, output_dim=3, p_drop=0.1):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.drop = nn.Dropout(p_drop)
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.adaptive_layer = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.drop(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.drop(x)
        x = self.adaptive_layer(x)
        return x

# ------------------------------------------------------------------------------
# 3. Physics‐informed Residual and Loss
# ------------------------------------------------------------------------------
def physics_residual(pred, X, stats):
    # Denormalize
    X_den = X * stats['X_std'] + stats['X_mean']
    CL, EO, KR, MC, SP, NF = X_den.t()
    eps = 1e-4

    # True targets
    IGR_t = KR * EO / torch.clamp(CL + eps, min=eps)
    AF_t  = NF * MC * SP
    CE_t  = EO / (1.0 + CL)

    Yt = torch.stack([IGR_t, AF_t, CE_t], dim=1)
    Yt_norm = (Yt - stats['Y_mean']) / stats['Y_std']
    return nn.MSELoss()(pred, Yt_norm)

def total_loss(pred, true, X, stats, lam=1.0):
    mse  = nn.MSELoss()(pred, true)
    phys = physics_residual(pred, X, stats)
    return mse + lam * phys, mse, phys

# ------------------------------------------------------------------------------
# 4. MC‐Dropout Uncertainty Quantification
# ------------------------------------------------------------------------------
def mc_dropout_predict(model, X, T=50):
    model.train()
    preds = []
    with torch.no_grad():
        for _ in range(T):
            preds.append(model(X))
    arr = torch.stack(preds, dim=0)
    return arr.mean(0), arr.std(0)

# ------------------------------------------------------------------------------
# 5. Training Loop
# ------------------------------------------------------------------------------
def train(model, train_loader, val_loader, stats, device,
          lr=1e-4, wd=1e-5, lam=1.0, epochs=100, patience=10):
    model.to(device)
    opt = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    sched = optim.lr_scheduler.ReduceLROnPlateau(opt, 'min', factor=0.5, patience=5)

    best_val, wait = float('inf'), 0
    history = {'train': [], 'val': []}

    for ep in range(1, epochs+1):
        # — Training —
        model.train()
        run = 0.0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            pred = model(xb)
            loss, _, _ = total_loss(pred, yb, xb, stats, lam)

            if torch.isnan(loss):
                print(f"NaN loss at epoch {ep}, aborting.")
                return history

            opt.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            opt.step()
            run += loss.item() * xb.size(0)
        train_loss = run / len(train_loader.dataset)

        # — Validation —
        model.eval()
        run = 0.0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                pred = model(xb)
                loss, _, _ = total_loss(pred, yb, xb, stats, lam)
                run += loss.item() * xb.size(0)
        val_loss = run / len(val_loader.dataset)

        sched.step(val_loss)
        history['train'].append(train_loss)
        history['val'].append(val_loss)
        print(f"Epoch {ep:03d} | Train {train_loss:.4e} | Val {val_loss:.4e}")

        # Checkpoint & early stop
        if val_loss < best_val - 1e-8:
            best_val, wait = val_loss, 0
            torch.save(model.state_dict(), "best_selfevolving.pth")
        else:
            wait += 1
            if wait >= patience:
                print("Early stopping.")
                break

    # Safe load
    if os.path.exists("best_selfevolving.pth"):
        model.load_state_dict(torch.load("best_selfevolving.pth", map_location=device))
    else:
        print("No checkpoint found; using last model.")

    return history

# ------------------------------------------------------------------------------
# 6. Visualization Helpers
# ------------------------------------------------------------------------------
def plot_history(hist):
    plt.plot(hist['train'], label='Train')
    plt.plot(hist['val'],   label='Val')
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.show()

def plot_scatter(y_true, y_pred, name):
    plt.scatter(y_true, y_pred, s=5, alpha=0.6)
    m, M = y_true.min(), y_true.max()
    plt.plot([m, M], [m, M], 'r--')
    plt.title(name)
    plt.xlabel("True")
    plt.ylabel("Pred")
    plt.show()

def plot_uncertainty(model, stats, device):
    # vary CL vs EO for IGR uncertainty
    CL = np.linspace(0,1,100, dtype=np.float32)
    EO = np.linspace(0,1,100, dtype=np.float32)
    CM, EM = np.meshgrid(CL, EO)
    pts = CM.size

    grid = torch.zeros((pts, 6), device=device)
    grid[:, 0] = torch.from_numpy(CM.ravel()).to(device)
    grid[:, 1] = torch.from_numpy(EM.ravel()).to(device)
    for i in (2,3,4,5):
        grid[:, i] = stats['X_mean'][i]

    Xn = (grid - stats['X_mean']) / stats['X_std']
    _, std = mc_dropout_predict(model, Xn, T=100)
    U = std[:, 0].cpu().numpy().reshape(CM.shape)

    plt.pcolormesh(CM, EM, U, cmap='magma')
    plt.colorbar(label="Std IGR")
    plt.xlabel("Cognitive Load")
    plt.ylabel("Energy Optimization")
    plt.title("Uncertainty Heatmap: Intelligence Growth")
    plt.show()

# ------------------------------------------------------------------------------
# 7. Main Execution
# ------------------------------------------------------------------------------
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    ds = SelfEvolvingDataset(n_samples=6000)
    stats = {
        'X_mean': torch.tensor(ds.X_mean, dtype=torch.float32, device=device),
        'X_std' : torch.tensor(ds.X_std,  dtype=torch.float32, device=device),
        'Y_mean': torch.tensor(ds.Y_mean, dtype=torch.float32, device=device),
        'Y_std' : torch.tensor(ds.Y_std,  dtype=torch.float32, device=device),
    }

    n_val = int(0.2 * len(ds))
    tr_ds, va_ds = random_split(ds, [len(ds)-n_val, n_val])
    tr_ld = DataLoader(tr_ds, batch_size=128, shuffle=True)
    va_ld = DataLoader(va_ds, batch_size=256)

    model = SelfEvolvingAI(input_dim=6, hidden_dim=32, output_dim=3, p_drop=0.1).to(device)
    history = train(model, tr_ld, va_ld, stats, device,
                    lr=1e-4, wd=1e-5, lam=1.0, epochs=100, patience=10)

    plot_history(history)

    X_all = torch.from_numpy(ds.X).to(device)
    with torch.no_grad():
        Yp_norm = model(X_all).cpu().numpy()
    Yp = Yp_norm * ds.Y_std + ds.Y_mean
    Yt = ds.Y * ds.Y_std + ds.Y_mean
    names = ["Intelligence Growth", "Adaptation Factor", "Comp. Efficiency"]
    for i, nm in enumerate(names):
        plot_scatter(Yt[:,i], Yp[:,i], nm)

    plot_uncertainty(model, stats, device)