<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/multiverse_regression_quantile_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#!/usr/bin/env python3
"""
multiverse_regression_quantile.py

1. Generate synthetic 5-D compactification parameters and continuous scores.
2. Expand features: sin, cos, and pairwise sin-sin interactions.
3. Train an MLP regressor with MSE loss, early stopping, and LR scheduling.
4. Evaluate continuous MSE and discretize predictions using quantile bins.
5. Report binned accuracy, classification report, and confusion matrix.
"""

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    mean_squared_error,
    accuracy_score,
    classification_report,
    confusion_matrix
)
from torch.utils.data import TensorDataset, DataLoader

# 1. Data Generation
def generate_data(n_samples=15000, input_dim=5, seed=42):
    np.random.seed(seed)
    # Uniform angles [0, 2π]
    params = np.random.uniform(0, 2 * np.pi, size=(n_samples, input_dim)).astype(np.float32)
    # Continuous target: sum of sines
    scores = np.sum(np.sin(params), axis=1).astype(np.float32)
    return params, scores

# 2. Feature Expansion
def expand_features(params):
    """
    From original params (n,5) build:
      - sin(param_i), cos(param_i)  → 10 features
      - sin(param_i) * sin(param_j) for i<j → 10 features
    Total expanded_dim = 20
    """
    n, d = params.shape
    feats = []
    # sin and cos
    for i in range(d):
        feats.append(np.sin(params[:, i]))
        feats.append(np.cos(params[:, i]))
    # pairwise sin* sin
    for i in range(d):
        for j in range(i+1, d):
            feats.append(np.sin(params[:, i]) * np.sin(params[:, j]))
    return np.stack(feats, axis=1).astype(np.float32)

# 3. Regression Model
class MultiverseRegressor(nn.Module):
    def __init__(self, input_dim=20, hidden_dims=[64,32], drop_p=0.3):
        super().__init__()
        layers = []
        dims = [input_dim] + hidden_dims + [1]
        for i in range(len(dims) - 1):
            layers.append(nn.Linear(dims[i], dims[i+1]))
            if i < len(dims) - 2:
                layers.append(nn.ReLU())
                layers.append(nn.Dropout(drop_p))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        return self.net(x).squeeze(-1)

# 4. Training Loop with Early Stopping
def train_regressor(
    model, train_loader, val_loader,
    epochs=100, lr=1e-3, patience=10, device=None
):
    device = device or torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)

    optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-5)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(
        optimizer, mode="min", factor=0.5, patience=5
    )
    criterion = nn.MSELoss()

    best_val = float("inf")
    no_improve = 0

    for epoch in range(1, epochs + 1):
        # Training phase
        model.train()
        train_losses = []
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = criterion(preds, yb)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        # Validation phase
        model.eval()
        val_losses = []
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                val_losses.append(criterion(model(xb), yb).item())

        avg_train = np.mean(train_losses)
        avg_val   = np.mean(val_losses)
        scheduler.step(avg_val)

        if avg_val < best_val:
            best_val = avg_val
            no_improve = 0
            torch.save(model.state_dict(), "best_regressor.pth")
        else:
            no_improve += 1

        if epoch == 1 or epoch % 10 == 0:
            print(f"Epoch {epoch:03d} ─ Train MSE: {avg_train:.6f} ─ Val MSE: {avg_val:.6f}")

        if no_improve >= patience:
            print(f"Early stopping at epoch {epoch}. Best Val MSE: {best_val:.6f}")
            break

# 5. Main Execution
if __name__ == "__main__":
    # Hyperparameters
    ORIG_DIM    = 5
    EXP_DIM     = 20
    N_SAMPLES   = 15000
    TEST_SIZE   = 0.20
    BATCH_SIZE  = 256
    EPOCHS      = 100
    LEARNING_RT = 1e-3
    PATIENCE    = 10
    N_BINS      = 10

    # Generate raw data
    params, scores = generate_data(N_SAMPLES, ORIG_DIM)

    # Expand features
    X = expand_features(params)

    # Train/validation split
    X_tr, X_val, y_tr, y_val = train_test_split(
        X, scores, test_size=TEST_SIZE, random_state=42
    )

    # DataLoaders
    train_ds = TensorDataset(torch.from_numpy(X_tr), torch.from_numpy(y_tr))
    val_ds   = TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True)
    val_loader   = DataLoader(val_ds, batch_size=BATCH_SIZE)

    # Initialize and train model
    model = MultiverseRegressor(input_dim=EXP_DIM, hidden_dims=[64,32], drop_p=0.3)
    train_regressor(model, train_loader, val_loader,
                    epochs=EPOCHS, lr=LEARNING_RT, patience=PATIENCE)

    # Load best model
    model.load_state_dict(torch.load("best_regressor.pth"))
    model.eval()

    # Predictions
    with torch.no_grad():
        y_pred_tr  = model(torch.from_numpy(X_tr)).numpy()
        y_pred_val = model(torch.from_numpy(X_val)).numpy()

    # Continuous evaluation
    mse_tr  = mean_squared_error(y_tr, y_pred_tr)
    mse_val = mean_squared_error(y_val, y_pred_val)
    print(f"\nFinal MSE ─ Train: {mse_tr:.6f} ─ Val: {mse_val:.6f}")

    # Quantile-based binning
    edges     = np.quantile(y_tr, np.linspace(0,1,N_BINS+1))
    true_bin  = np.digitize(y_val, edges, right=True) - 1
    pred_bin  = np.digitize(y_pred_val, edges, right=True) - 1

    # Classification metrics
    acc = accuracy_score(true_bin, pred_bin)
    print(f"\nBinned Accuracy: {acc:.3f}\n")
    print("Classification Report:\n", classification_report(true_bin, pred_bin, digits=4))
    print("Confusion Matrix:\n", confusion_matrix(true_bin, pred_bin))