<a href="https://colab.research.google.com/github/OneFineStarstuff/Cosmic-Brilliance/blob/main/pinn_full_pipeline_analysis_py.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install torch numpy matplotlib scikit-learn scipy umap-learn

In [None]:
# pinn_full_pipeline_analysis.py

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
from sklearn.linear_model import LogisticRegression
from scipy.stats import pearsonr
import umap
import random

# Reproducibility
SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)


# -----------------------------------------------------------------------------
# 1. Synthetic Data Generators
# -----------------------------------------------------------------------------
def generate_dissolution_data(n_samples=500):
    k = 0.5
    C0 = 1.0
    t = np.linspace(0, 10, n_samples)[:, None]
    C = C0 * np.exp(-k * t) + 0.01 * np.random.randn(*t.shape)
    return torch.tensor(t, dtype=torch.float32), torch.tensor(C, dtype=torch.float32)


def generate_accumulation_data(n_samples=500):
    k = 0.3
    M0 = 0.5
    t = np.linspace(0, 10, n_samples)[:, None]
    M = M0 * np.exp(k * t) + 0.01 * np.random.randn(*t.shape)
    return torch.tensor(t, dtype=torch.float32), torch.tensor(M, dtype=torch.float32)


# -----------------------------------------------------------------------------
# 2. PINN Model Definition
# -----------------------------------------------------------------------------
class PINN(nn.Module):
    def __init__(self, in_dim=1, hidden=64, out_dim=1):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, hidden),
            nn.Tanh(),
            nn.Linear(hidden, hidden),
            nn.Tanh(),
            nn.Linear(hidden, out_dim),
        )

    def forward(self, x):
        return self.net(x)


# -----------------------------------------------------------------------------
# 3. Physics Residual Functions
# -----------------------------------------------------------------------------
def residual_dissolution(model, t):
    t.requires_grad_(True)
    C_pred = model(t)
    dC_dt = torch.autograd.grad(C_pred.sum(), t, create_graph=True)[0]
    return dC_dt + 0.5 * C_pred  # dC/dt + k C = 0


def residual_accumulation(model, t):
    t.requires_grad_(True)
    M_pred = model(t)
    dM_dt = torch.autograd.grad(M_pred.sum(), t, create_graph=True)[0]
    return dM_dt - 0.3 * M_pred  # dM/dt - k M = 0


# -----------------------------------------------------------------------------
# 4. Training Loop
# -----------------------------------------------------------------------------
def train_pinn(model, loader, res_fn, epochs=100, phy_lambda=1.0, lr=1e-3, tag="PINN"):
    opt = optim.Adam(model.parameters(), lr=lr)
    mse = nn.MSELoss()
    for ep in range(1, epochs + 1):
        data_loss = 0.0
        phy_loss = 0.0
        for t_b, y_b in loader:
            opt.zero_grad()
            y_pred = model(t_b)
            ld = mse(y_pred, y_b)
            res = res_fn(model, t_b)
            lp = mse(res, torch.zeros_like(res))
            loss = ld + phy_lambda * lp
            loss.backward()
            opt.step()
            data_loss += ld.item()
            phy_loss += lp.item()

        print(f"[{tag}] Epoch {ep:3d}/{epochs} — Data: {data_loss/len(loader):.4f}, Phy: {phy_loss/len(loader):.4f}")
    return model


# -----------------------------------------------------------------------------
# 5. MC-Dropout Inference
# -----------------------------------------------------------------------------
def mc_dropout_predict(model, t, samples=50):
    model.train()  # keep dropout on (if any)
    preds = [model(t).detach().cpu().numpy() for _ in range(samples)]
    arr = np.stack(preds, axis=0)
    return arr.mean(axis=0).flatten(), arr.std(axis=0).flatten()


# -----------------------------------------------------------------------------
# 6. Main Pipeline
# -----------------------------------------------------------------------------
def main():
    # Generate synthetic datasets
    t_diss, C = generate_dissolution_data()
    t_acc, M = generate_accumulation_data()

    # Create DataLoaders
    bs = 128
    loader_d = DataLoader(TensorDataset(t_diss, C), batch_size=bs, shuffle=True)
    loader_a = DataLoader(TensorDataset(t_acc, M), batch_size=bs, shuffle=True)

    # Instantiate and train models
    model_d = train_pinn(PINN(), loader_d, residual_dissolution, tag="DissolutionAI")
    model_a = train_pinn(PINN(), loader_a, residual_accumulation, tag="PreComputationalAI")

    # Inference grid
    t_full = torch.linspace(0, 10, 500)[:, None]
    mean_d, std_d = mc_dropout_predict(model_d, t_full)
    mean_a, std_a = mc_dropout_predict(model_a, t_full)

    print("\nDissolutionAI Mean shape:", mean_d.shape, "Std shape:", std_d.shape)
    print("PreComputationalAI Mean shape:", mean_a.shape, "Std shape:", std_a.shape)

    # Extract latent features (last hidden layer) for UMAP
    def extract_features(model, t):
        x = model.net[0](t)
        x = torch.tanh(x)
        x = model.net[2](x)
        x = torch.tanh(x)
        return x.detach().cpu().numpy()

    feats_d = extract_features(model_d, t_full)
    feats_a = extract_features(model_a, t_full)
    feats = np.vstack([feats_d, feats_a])

    # UMAP projection
    reducer = umap.UMAP(n_components=2, random_state=SEED)
    emb = reducer.fit_transform(feats)
    print("UMAP embedding computed:", emb.shape)

    # -------------------------
    # Analysis: Visualization
    # -------------------------
    std_all = np.concatenate([std_d, std_a])
    labels = np.array([0]*len(std_d) + [1]*len(std_a))

    plt.figure(figsize=(7, 6))
    plt.scatter(emb[:500, 0], emb[:500, 1], c='C0', alpha=0.6, label='Dissolution')
    plt.scatter(emb[500:, 0], emb[500:, 1], c='C1', alpha=0.6, label='Accumulation')
    plt.title('UMAP of PINN Latent Features')
    plt.xlabel('UMAP1')
    plt.ylabel('UMAP2')
    plt.legend()
    plt.tight_layout()
    plt.show()

    # -------------------------
    # Analysis: Clustering
    # -------------------------
    kmeans = KMeans(n_clusters=2, random_state=SEED)
    cluster_ids = kmeans.fit_predict(emb)
    sil = silhouette_score(emb, cluster_ids)
    print(f"KMeans Silhouette Score: {sil:.4f}")

    plt.figure(figsize=(7, 6))
    plt.scatter(emb[:, 0], emb[:, 1], c=cluster_ids, cmap='tab10', alpha=0.6)
    centers = kmeans.cluster_centers_
    plt.scatter(centers[:, 0], centers[:, 1], c='k', s=80, marker='X', label='Centers')
    plt.title('KMeans on UMAP Embedding')
    plt.tight_layout()
    plt.show()

    # -------------------------
    # Analysis: Classification
    # -------------------------
    clf = LogisticRegression()
    clf.fit(emb, labels)
    acc = clf.score(emb, labels)
    print(f"Logistic Regression Accuracy on embeddings: {acc:.4f}")

    # -------------------------
    # Analysis: Uncertainty vs Distance
    # -------------------------
    dists = np.linalg.norm(emb - centers[cluster_ids], axis=1)
    corr, pval = pearsonr(dists, std_all)
    print(f"Pearson r (distance vs uncertainty): {corr:.4f}, p={pval:.2e}")

    plt.figure(figsize=(6, 4))
    plt.scatter(dists, std_all, alpha=0.6)
    plt.xlabel('Distance to Cluster Center')
    plt.ylabel('MC-Dropout Std')
    plt.title('Uncertainty vs Embedding Distance')
    plt.tight_layout()
    plt.show()


if __name__ == "__main__":
    main()