<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/extended_self_recursive_agi_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
"""
extended_self_recursive_agi.py

- Synthetic train + val data
- Feed-forward net with inner/outer loops + Dropout for MC-Dropout
- Training loop + checkpointing
- Evaluation on validation set
- Visualization: scatter, residuals, reliability diagram
"""

import os
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split
from torch.optim.lr_scheduler import StepLR
from tqdm import tqdm
import matplotlib.pyplot as plt
import numpy as np
from sklearn.calibration import calibration_curve


# -----------------------------------------------------------------------------
# 1. Synthetic Regression Dataset (Train + Val)
# -----------------------------------------------------------------------------
class SyntheticRegressionDataset(Dataset):
    def __init__(self, num_samples=2500, input_size=10, val_split=500):
        super().__init__()
        X = torch.randn(num_samples, input_size)
        true_w = torch.randn(input_size, 1)
        y = X @ true_w + 0.1 * torch.randn(num_samples, 1)
        # split into train / val
        self.train_set, self.val_set = random_split(
            list(zip(X, y)),
            [num_samples - val_split, val_split]
        )

    def get_loaders(self, batch_size=32):
        train_loader = DataLoader(self.train_set, batch_size=batch_size,
                                  shuffle=True)
        val_loader   = DataLoader(self.val_set,   batch_size=batch_size,
                                  shuffle=False)
        return train_loader, val_loader


# -----------------------------------------------------------------------------
# 2. Model with Dropout & MC-Dropout Inference
# -----------------------------------------------------------------------------
class SelfRecursiveAGI(nn.Module):
    def __init__(self,
                 input_size: int,
                 hidden_size: int,
                 output_size: int,
                 lr_main: float = 1e-3,
                 lr_self: float = 1e-3,
                 dropout_p: float = 0.1):
        super().__init__()
        self.hidden = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.ReLU(),
            nn.Dropout(dropout_p)
        )
        self.output = nn.Linear(hidden_size, output_size)

        # Outer and inner optimizers
        self.main_optimizer = optim.Adam(self.parameters(), lr=lr_main)
        self.self_optimizer = optim.Adam(self.parameters(), lr=lr_self)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        h = self.hidden(x)
        return self.output(h)

    def self_improve(self,
                     loss_fn,
                     x: torch.Tensor,
                     y: torch.Tensor,
                     steps: int = 5,
                     clip_grad_norm: float = 1.0) -> float:
        last_loss = 0.0
        for _ in range(steps):
            self.self_optimizer.zero_grad()
            preds = self.forward(x)
            loss = loss_fn(preds, y)
            loss.backward()
            nn.utils.clip_grad_norm_(self.parameters(), clip_grad_norm)
            self.self_optimizer.step()
            last_loss = loss.item()
        return last_loss

    @torch.no_grad()
    def mc_dropout_predict(self,
                           x: torch.Tensor,
                           mc_samples: int = 50) -> (torch.Tensor, torch.Tensor):
        """
        Run T stochastic forward passes under Dropout,
        returning predictive mean and std for each input.
        """
        self.train()  # keep dropout on
        preds = []
        for _ in range(mc_samples):
            preds.append(self.forward(x).cpu().numpy())
        preds = np.stack(preds, axis=0)  # shape: [T, batch, 1]
        mean = preds.mean(axis=0).squeeze()
        std  = preds.std(axis=0).squeeze()
        return mean, std


# -----------------------------------------------------------------------------
# 3. Training with Self-Improvement & Checkpoints
# -----------------------------------------------------------------------------
def train_self_recursive(
    model: SelfRecursiveAGI,
    train_loader: DataLoader,
    loss_fn,
    device: torch.device,
    epochs: int = 20,
    self_steps: int = 3,
    clip_grad_norm: float = 1.0,
    scheduler_step: int = 5,
    scheduler_gamma: float = 0.5,
    checkpoint_dir: str = 'checkpoints'):
    os.makedirs(checkpoint_dir, exist_ok=True)
    model.to(device)
    scheduler = StepLR(model.main_optimizer,
                       step_size=scheduler_step,
                       gamma=scheduler_gamma)

    for epoch in range(1, epochs + 1):
        model.train()
        running_loss = 0.0
        pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{epochs}")
        for Xb, yb in pbar:
            Xb, yb = Xb.to(device), yb.to(device)

            # Outer loop
            model.main_optimizer.zero_grad()
            out = model(Xb)
            loss_main = loss_fn(out, yb)
            loss_main.backward()
            nn.utils.clip_grad_norm_(model.parameters(), clip_grad_norm)
            model.main_optimizer.step()

            # Inner loop self-improve
            loss_self = model.self_improve(loss_fn, Xb, yb,
                                           steps=self_steps,
                                           clip_grad_norm=clip_grad_norm)

            running_loss += loss_main.item()
            pbar.set_postfix({
                'main': f"{loss_main.item():.4f}",
                'self': f"{loss_self:.4f}"
            })

        scheduler.step()
        avg = running_loss / len(train_loader)
        print(f"Epoch {epoch} done, Avg Loss: {avg:.6f}")

        if epoch % scheduler_step == 0:
            path = os.path.join(checkpoint_dir, f"agi_epoch{epoch}.pt")
            torch.save({
                'epoch': epoch,
                'model': model.state_dict(),
                'optim': model.main_optimizer.state_dict(),
                'sched': scheduler.state_dict()
            }, path)
            print(f"Checkpoint → {path}")


# -----------------------------------------------------------------------------
# 4. Evaluation & Plots
# -----------------------------------------------------------------------------
def evaluate_and_plot(model: SelfRecursiveAGI,
                      val_loader: DataLoader,
                      device: torch.device):
    model.eval()
    ys, preds_mean, preds_std = [], [], []

    for Xb, yb in val_loader:
        Xb = Xb.to(device)
        mean, std = model.mc_dropout_predict(Xb, mc_samples=50)
        preds_mean.append(mean)
        preds_std.append(std)
        ys.append(yb.numpy().squeeze())

    y_true = np.concatenate(ys)
    y_pred = np.concatenate(preds_mean)
    y_err  = np.abs(y_pred - y_true)
    uncert = np.concatenate(preds_std)

    # 1) Scatter True vs Pred
    plt.figure(figsize=(5,5))
    plt.scatter(y_true, y_pred, alpha=0.6)
    lims = [y_true.min(), y_true.max()]
    plt.plot(lims, lims, '--', color='gray')
    plt.xlabel("True y")
    plt.ylabel("Predicted y")
    plt.title("True vs. Predicted")
    plt.tight_layout()
    plt.show()

    # 2) Residual Histogram
    plt.figure()
    plt.hist(y_err, bins=30, alpha=0.7)
    plt.xlabel("Absolute Error")
    plt.title("Residuals")
    plt.tight_layout()
    plt.show()

    # 3) Reliability Diagram
    # Bin by predicted uncertainty, compare mean error vs uncertainty
    bins = np.linspace(uncert.min(), uncert.max(), 10)
    bin_ids = np.digitize(uncert, bins) - 1
    mean_err = [y_err[bin_ids == i].mean() if np.any(bin_ids==i) else 0.0
                for i in range(len(bins))]
    bin_center = (bins[:-1] + bins[1:]) / 2

    plt.figure()
    plt.plot(bin_center, mean_err[:-1], '-o')
    plt.plot([uncert.min(), uncert.max()],
             [uncert.min(), uncert.max()],
             '--', color='gray')
    plt.xlabel("Predicted σ")
    plt.ylabel("Empirical |Error|")
    plt.title("Reliability Diagram")
    plt.tight_layout()
    plt.show()


# -----------------------------------------------------------------------------
# 5. Main Entry Point
# -----------------------------------------------------------------------------
if __name__ == "__main__":
    # Hyperparameters
    BATCH_SIZE = 32
    INPUT_SIZE = 10
    HIDDEN_SIZE = 64
    OUTPUT_SIZE = 1
    LR_MAIN = 1e-3
    LR_SELF = 1e-3
    EPOCHS = 20
    SELF_STEPS = 3
    DROP_P = 0.1
    CLIP_GRAD = 1.0
    SCHED_STEP = 5
    SCHED_GAMMA = 0.5

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Data
    ds = SyntheticRegressionDataset(
        num_samples=2500,
        input_size=INPUT_SIZE,
        val_split=500
    )
    train_loader, val_loader = ds.get_loaders(batch_size=BATCH_SIZE)

    # Model, loss
    model = SelfRecursiveAGI(
        input_size=INPUT_SIZE,
        hidden_size=HIDDEN_SIZE,
        output_size=OUTPUT_SIZE,
        lr_main=LR_MAIN,
        lr_self=LR_SELF,
        dropout_p=DROP_P
    )
    criterion = nn.MSELoss()

    # Train
    train_self_recursive(
        model, train_loader, criterion, device,
        epochs=EPOCHS,
        self_steps=SELF_STEPS,
        clip_grad_norm=CLIP_GRAD,
        scheduler_step=SCHED_STEP,
        scheduler_gamma=SCHED_GAMMA,
        checkpoint_dir='checkpoints'
    )

    # Evaluate + visualize
    evaluate_and_plot(model, val_loader, device)