<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/multiverse_regression_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
"""
multiverse_regression.py

Predict continuous topology scores from string-theory compactification
parameters using a regression neural network. Then convert continuous
predictions into discrete bins for classification metrics.
"""

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    mean_squared_error,
    accuracy_score,
    confusion_matrix,
    classification_report
)

# 1. Synthetic Dataset Generation
def generate_data(n_samples=15000, input_dim=5, seed=42):
    np.random.seed(seed)
    params = np.random.uniform(0, 2 * np.pi, size=(n_samples, input_dim)).astype(np.float32)
    scores = np.sum(np.sin(params), axis=1).astype(np.float32)
    return params, scores

# 2. Regression Model Definition
class MultiverseRegressor(nn.Module):
    def __init__(self, input_dim=5, hidden_dims=[64, 32], drop_p=0.3):
        super().__init__()
        layers = []
        dims = [input_dim] + hidden_dims + [1]
        for i in range(len(dims) - 1):
            layers.append(nn.Linear(dims[i], dims[i+1]))
            if i < len(dims) - 2:
                layers.append(nn.ReLU())
                layers.append(nn.Dropout(drop_p))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x).squeeze(-1)

# 3. Training Loop with Early Stopping
def train_regressor(
    model, train_loader, val_loader,
    epochs=100, lr=1e-3, patience=10, device=None
):
    device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="min", factor=0.5, patience=5
    )
    criterion = nn.MSELoss()

    best_val_loss = float("inf")
    epochs_no_improve = 0

    for epoch in range(1, epochs + 1):
        # Training
        model.train()
        train_losses = []
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        # Validation
        model.eval()
        val_losses = []
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                preds = model(xb)
                val_losses.append(criterion(preds, yb).item())

        avg_train = np.mean(train_losses)
        avg_val   = np.mean(val_losses)
        scheduler.step(avg_val)

        if avg_val < best_val_loss:
            best_val_loss = avg_val
            epochs_no_improve = 0
            torch.save(model.state_dict(), "best_regressor.pth")
        else:
            epochs_no_improve += 1

        if epoch == 1 or epoch % 10 == 0:
            print(
                f"Epoch {epoch:03d} ─ "
                f"Train MSE: {avg_train:.6f} ─ "
                f"Val MSE: {avg_val:.6f}"
            )

        if epochs_no_improve >= patience:
            print(f"Early stopping at epoch {epoch}. "
                  f"Best Val MSE: {best_val_loss:.6f}")
            break

# 4. Main Execution
if __name__ == "__main__":
    # Hyperparameters
    INPUT_DIM   = 5
    N_SAMPLES   = 15000
    TEST_SIZE   = 0.2
    BATCH_SIZE  = 256
    EPOCHS      = 100
    LR          = 1e-3
    PATIENCE    = 10
    HIDDEN_DIMS = [64, 32]
    DROP_P      = 0.3
    N_BINS      = 10  # for discrete evaluation

    # Generate continuous data
    X, y = generate_data(N_SAMPLES, INPUT_DIM)

    # Train/validation split
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=TEST_SIZE, random_state=42
    )

    # DataLoaders
    train_ds = TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train))
    val_ds   = TensorDataset(torch.from_numpy(X_val),   torch.from_numpy(y_val))
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_loader   = DataLoader(val_ds,   batch_size=BATCH_SIZE)

    # Initialize and train model
    model = MultiverseRegressor(
        input_dim=INPUT_DIM,
        hidden_dims=HIDDEN_DIMS,
        drop_p=DROP_P
    )
    train_regressor(
        model, train_loader, val_loader,
        epochs=EPOCHS, lr=LR, patience=PATIENCE
    )

    # Load best model
    model.load_state_dict(torch.load("best_regressor.pth"))
    model.eval()

    # Final continuous evaluation
    with torch.no_grad():
        y_pred_train = model(torch.from_numpy(X_train)).numpy()
        y_pred_val   = model(torch.from_numpy(X_val)).numpy()

    mse_train = mean_squared_error(y_train, y_pred_train)
    mse_val   = mean_squared_error(y_val,   y_pred_val)
    print(f"\nFinal MSE ─ Train: {mse_train:.6f} ─ Val: {mse_val:.6f}")

    # Map to discrete bins for classification metrics
    edges       = np.linspace(y.min(), y.max(), N_BINS + 1)
    true_bins   = np.digitize(y_val, edges) - 1
    pred_bins   = np.digitize(y_pred_val, edges) - 1

    acc = accuracy_score(true_bins, pred_bins)
    print(f"\nBinned Classification Accuracy: {acc:.3f}\n")
    print("Classification Report:\n", classification_report(true_bins, pred_bins, digits=4))
    print("Confusion Matrix:\n", confusion_matrix(true_bins, pred_bins))