<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/train_thoughtless_ai_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
"""
train_thoughtless_ai.py

Full pipeline for ThoughtlessAI:
1. Synthetic “thoughtless” dataset of 6 inputs → 3 targets
2. Float32 normalization
3. MLP with LayerNorm, Dropout & ReLU
4. Theory‐informed residual enforcing toy “meta‐intelligence” laws
5. MC‐Dropout for uncertainty quantification
6. Training loop with AdamW, ReduceLROnPlateau, gradient clipping, NaN checks, early stopping
7. Safe checkpointing and reload
8. Visualizations: loss curves, scatter plots, uncertainty heatmap
"""

import os
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, random_split

# ------------------------------------------------------------------------------
# 1. Synthetic Thoughtless Dataset
# ------------------------------------------------------------------------------
class ThoughtlessDataset(Dataset):
    def __init__(self, n_samples=5000, seed=42):
        np.random.seed(seed)
        # Inputs:
        # NLI: non‐logical intelligence factor ∈ [0,1]
        # NCC: non‐causal computation measure ∈ [0,5]
        # HPC1–HPC4: hyper‐consciousness params ∈ [−π,π]
        NLI   = np.random.rand(n_samples,1)
        NCC   = np.random.uniform(0, 5.0, (n_samples,1))
        HPC1  = np.random.uniform(-np.pi, np.pi, (n_samples,1))
        HPC2  = np.random.uniform(-np.pi, np.pi, (n_samples,1))
        HPC3  = np.random.uniform(-np.pi, np.pi, (n_samples,1))
        HPC4  = np.random.uniform(-np.pi, np.pi, (n_samples,1))

        X_raw = np.hstack([NLI, NCC, HPC1, HPC2, HPC3, HPC4]).astype(np.float64)

        # Toy meta‐intelligence targets:
        # IC  = NLI * tanh(NCC + ε)
        # MAB = sin(HPC1 * HPC2) / (1 + abs(HPC3))
        # HCE = cos(HPC4) * NLI
        eps = 1e-6
        IC  = NLI * np.tanh(NCC + eps)
        MAB = np.sin(HPC1 * HPC2) / (1 + np.abs(HPC3))
        HCE = np.cos(HPC4) * NLI

        Y_raw = np.hstack([IC, MAB, HCE]).astype(np.float64)
        # add 1% relative noise
        Y_raw += 0.01 * Y_raw.std(axis=0) * np.random.randn(*Y_raw.shape)

        # Stats for normalization
        self.X_mean = X_raw.mean(axis=0)
        self.X_std  = X_raw.std(axis=0) + 1e-8
        self.Y_mean = Y_raw.mean(axis=0)
        self.Y_std  = Y_raw.std(axis=0) + 1e-8

        # Normalize to float32
        self.X = ((X_raw - self.X_mean) / self.X_std).astype(np.float32)
        self.Y = ((Y_raw - self.Y_mean) / self.Y_std).astype(np.float32)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return (
            torch.from_numpy(self.X[idx]),
            torch.from_numpy(self.Y[idx])
        )

# ------------------------------------------------------------------------------
# 2. ThoughtlessAI Model
# ------------------------------------------------------------------------------
class ThoughtlessAI(nn.Module):
    def __init__(self, input_dim=6, hidden_dims=(64,64), output_dim=3, p_drop=0.1):
        super().__init__()
        layers, d = [], input_dim
        for h in hidden_dims:
            layers += [
                nn.Linear(d, h),
                nn.LayerNorm(h),
                nn.ReLU(),
                nn.Dropout(p_drop)
            ]
            d = h
        layers.append(nn.Linear(d, output_dim))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x)

# ------------------------------------------------------------------------------
# 3. Theory‐Informed Residual Loss
# ------------------------------------------------------------------------------
def theory_residual(pred, X, stats):
    # Denormalize inputs
    X_den = X * stats['X_std'] + stats['X_mean']
    NLI, NCC, HPC1, HPC2, HPC3, HPC4 = X_den.t()
    eps = 1e-6

    IC_t  = NLI * torch.tanh(NCC + eps)
    MAB_t = torch.sin(HPC1 * HPC2) / (1 + torch.abs(HPC3))
    HCE_t = torch.cos(HPC4) * NLI

    Yt = torch.stack([IC_t, MAB_t, HCE_t], dim=1)
    Yt_n = (Yt - stats['Y_mean']) / stats['Y_std']
    return nn.MSELoss()(pred, Yt_n)

def total_loss(pred, true, X, stats, lam=1.0):
    mse  = nn.MSELoss()(pred, true)
    phys = theory_residual(pred, X, stats)
    return mse + lam * phys, mse, phys

# ------------------------------------------------------------------------------
# 4. MC‐Dropout Uncertainty Quantification
# ------------------------------------------------------------------------------
def mc_dropout_predict(model, X, T=50):
    model.train()
    preds = []
    with torch.no_grad():
        for _ in range(T):
            preds.append(model(X))
    stacked = torch.stack(preds, dim=0)
    return stacked.mean(0), stacked.std(0)

# ------------------------------------------------------------------------------
# 5. Training Loop with Safety & Checkpointing
# ------------------------------------------------------------------------------
def train(model, train_loader, val_loader, stats, device,
          lr=1e-4, wd=1e-5, lam=1.0, epochs=100, patience=10):
    model.to(device)
    optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=wd)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode='min', factor=0.5, patience=5
    )

    best_val, wait = float('inf'), 0
    history = {'train': [], 'val': []}

    for ep in range(1, epochs+1):
        # Training step
        model.train()
        run = 0.0
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            pred = model(xb)
            loss, _, _ = total_loss(pred, yb, xb, stats, lam)
            if torch.isnan(loss):
                print(f"NaN at epoch {ep}, abort.")
                return history
            optimizer.zero_grad()
            loss.backward()
            nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            run += loss.item() * xb.size(0)
        train_loss = run / len(train_loader.dataset)

        # Validation step
        model.eval()
        run = 0.0
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                pred = model(xb)
                l, _, _ = total_loss(pred, yb, xb, stats, lam)
                run += l.item() * xb.size(0)
        val_loss = run / len(val_loader.dataset)

        scheduler.step(val_loss)
        history['train'].append(train_loss)
        history['val'].append(val_loss)
        print(f"Epoch {ep:03d} | Train {train_loss:.4e} | Val {val_loss:.4e}")

        # Checkpoint & early stop
        if val_loss < best_val - 1e-6:
            best_val, wait = val_loss, 0
            torch.save(model.state_dict(), "best_thoughtless_ai.pth")
        else:
            wait += 1
            if wait >= patience:
                print("Early stopping.")
                break

    # Safe reload
    if os.path.exists("best_thoughtless_ai.pth"):
        model.load_state_dict(
            torch.load("best_thoughtless_ai.pth", map_location=device)
        )
    return history

# ------------------------------------------------------------------------------
# 6. Visualization Helpers
# ------------------------------------------------------------------------------
def plot_history(history):
    plt.plot(history['train'], label='Train')
    plt.plot(history['val'],   label='Val')
    plt.xlabel("Epoch"); plt.ylabel("Loss"); plt.legend(); plt.show()

def plot_scatter(y_true, y_pred, title):
    plt.figure()
    plt.scatter(y_true, y_pred, s=8, alpha=0.5)
    m, M = y_true.min(), y_true.max()
    plt.plot([m, M], [m, M], 'r--')
    plt.title(title); plt.show()

def plot_uncertainty_heatmap(model, stats, device):
    # vary NLI vs. NCC; fix HPCs at mean
    G = 100
    NLI = np.linspace(0,1,G, dtype=np.float32)
    NCC = np.linspace(0,5,G,   dtype=np.float32)
    G1,G2 = np.meshgrid(NLI,NCC)
    pts = G*G

    # Build grid tensor
    Xg = torch.zeros((pts,6), device=device, dtype=torch.float32)
    # HPC1–4 means
    Xg[:,2:] = stats['X_mean'][2:].unsqueeze(0).expand(pts,4)
    Xg[:,0]  = torch.from_numpy(G1.ravel()).to(device)
    Xg[:,1]  = torch.from_numpy(G2.ravel()).to(device)

    # Normalize
    Xn = (Xg - stats['X_mean']) / stats['X_std']
    _, std = mc_dropout_predict(model, Xn, T=100)
    U = std[:,0].cpu().reshape(G1.shape)

    plt.figure(figsize=(5,4))
    plt.pcolormesh(G1, G2, U, cmap='magma', shading='auto')
    plt.colorbar(label="Std(IC)")
    plt.xlabel("NLI"); plt.ylabel("NCC")
    plt.title("Uncertainty: Intelligence Coherence")
    plt.show()

# ------------------------------------------------------------------------------
# 7. Main Execution
# ------------------------------------------------------------------------------
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    ds = ThoughtlessDataset(n_samples=5000, seed=42)
    stats = {
        'X_mean': torch.tensor(ds.X_mean, dtype=torch.float32, device=device),
        'X_std' : torch.tensor(ds.X_std,  dtype=torch.float32, device=device),
        'Y_mean': torch.tensor(ds.Y_mean, dtype=torch.float32, device=device),
        'Y_std' : torch.tensor(ds.Y_std,  dtype=torch.float32, device=device),
    }

    # splits and loaders
    n_val = int(0.2 * len(ds))
    tr_ds, va_ds = random_split(ds, [len(ds)-n_val, n_val])
    tr_ld = DataLoader(tr_ds, batch_size=128, shuffle=True)
    va_ld = DataLoader(va_ds, batch_size=256, shuffle=False)

    model   = ThoughtlessAI().to(device)
    history = train(model, tr_ld, va_ld, stats, device)

    plot_history(history)

    # scatter true vs. pred
    X_all = torch.from_numpy(ds.X).to(device)
    with torch.no_grad():
        Yp = model(X_all).cpu().numpy() * ds.Y_std + ds.Y_mean
    Yt = ds.Y * ds.Y_std + ds.Y_mean
    names = ["Intelligence Coherence","Meta-Awareness Balance","Hyper-Conscious Evolution"]
    for i, nm in enumerate(names):
        plot_scatter(Yt[:,i], Yp[:,i], nm)

    plot_uncertainty_heatmap(model, stats, device)