# Task 1


In [None]:
import os, math, sys, csv, random
import json
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(SEED)

In [None]:
HELPER_PATH = "Committor1D_helpers.py"

import importlib.util
spec = importlib.util.spec_from_file_location("comm1d_helpers", HELPER_PATH)
if spec is None or spec.loader is None:
    raise FileNotFoundError(f"Could not load {HELPER_PATH}. Place it next to this notebook or update HELPER_PATH.")
comm = importlib.util.module_from_spec(spec)
sys.modules["comm1d_helpers"] = comm
spec.loader.exec_module(comm)

[name for name in dir(comm) if not name.startswith("_")]

['ChebSum2_Clenshaw_matrix',
 'Cheb_coeffs2',
 'cheb',
 'np',
 'scipy',
 'solve1Dcommittor']

In [None]:
def Vprime_param(x, p0, p1, p2):
    base = 4*x**3 - 4*x
    gauss = np.exp(-((x - p1)**2) / (2 * p2**2))
    d_bump = p0 * gauss * (-(x - p1) / (p2**2))
    return base + d_bump

In [None]:
def make_dataset(nu=120, neval=100, N_cheb=256, seed=42, split=0.8):
    rng = np.random.default_rng(seed)
    beta = 1.0
    Nx_f = 1001
    x_f = np.linspace(-1.0, 1.0, Nx_f)

    P_list, Y_list, Q_list = [], [], []
    for _ in range(nu):
        p0 = rng.uniform(0.0, 5.0)
        p1 = rng.uniform(-0.5, 0.5)
        p2 = rng.uniform(0.25, 0.7)

        f_x = Vprime_param(x_f, p0, p1, p2)
        y = rng.uniform(-1.0, 1.0, size=(neval,))
        qy = comm.solve1Dcommittor(N_cheb, 1.0, 0.0, f_x, x_f, beta, y)

        P_list.append(np.tile(np.array([p0, p1, p2])[None, :], (neval, 1)))
        Y_list.append(y[:, None])
        Q_list.append(qy[:, None])

    P = np.vstack(P_list).astype(np.float32)
    Y = np.vstack(Y_list).astype(np.float32)
    Q = np.vstack(Q_list).astype(np.float32)

    samples_per_pot = neval
    n_train_pot = int(split * nu)
    train_idx = np.arange(0, n_train_pot * samples_per_pot)
    test_idx  = np.arange(n_train_pot * samples_per_pot, P.shape[0])

    return (P[train_idx], Y[train_idx], Q[train_idx]), (P[test_idx], Y[test_idx], Q[test_idx])

In [None]:
# Hyperparameters for data
nu, neval, N_cheb = 120, 100, 256
(P_tr, Y_tr, Q_tr), (P_te, Y_te, Q_te) = make_dataset(nu=nu, neval=neval, N_cheb=N_cheb, seed=SEED, split=0.8)

print("Train shapes:", P_tr.shape, Y_tr.shape, Q_tr.shape)
print("Test  shapes:",  P_te.shape, Y_te.shape, Q_te.shape)

# Normalize parameters (B)
def normalize_P(P):
    Pn = P.copy()
    # p0 in [0,5], p1 in [-0.5,0.5], p2 in [0.25,0.7]
    Pn[:,0] = (Pn[:,0] - 0.0) / 5.0
    Pn[:,1] = (Pn[:,1] - (-0.5)) / 1.0
    Pn[:,2] = (Pn[:,2] - 0.25) / 0.45
    return Pn

P_tr = normalize_P(P_tr)
P_te = normalize_P(P_te)

# save the dataset
np.savez("parametric_committor_train.npz", P=P_tr, Y=Y_tr, Q=Q_tr)
np.savez("parametric_committor_test.npz",  P=P_te, Y=Y_te, Q=Q_te)

Train shapes: (9600, 3) (9600, 1) (9600, 1)
Test  shapes: (2400, 3) (2400, 1) (2400, 1)


In [None]:
class XYDataset(Dataset):
    def __init__(self, P, Y, Q):
        self.P = torch.from_numpy(P).float()
        self.Y = torch.from_numpy(Y).float()
        self.Q = torch.from_numpy(Q).float()
    def __len__(self): return self.P.shape[0]
    def __getitem__(self, idx): return self.P[idx], self.Y[idx], self.Q[idx]

batch = 512
tr_ds = XYDataset(P_tr, Y_tr, Q_tr)
te_ds = XYDataset(P_te, Y_te, Q_te)
tr_loader = DataLoader(tr_ds, batch_size=batch, shuffle=True)
te_loader = DataLoader(te_ds, batch_size=max(1024, batch), shuffle=False)

In [None]:
class MLP(nn.Module):
    def __init__(self, in_dim, out_dim, hidden=(128,128), act=nn.Tanh):
        super().__init__()
        layers = []
        d = in_dim
        for h in hidden:
            layers += [nn.Linear(d, h), act()]
            d = h
        layers += [nn.Linear(d, out_dim)]
        self.net = nn.Sequential(*layers)
    def forward(self, x):
        return self.net(x)

class FourierFeatures(nn.Module):
    def __init__(self, K=3):
        super().__init__()
        self.K = K
        self.pi = math.pi
    def forward(self, y):  # y in [-1,1], shape [B,1]
        feats = [y]
        for k in range(1, self.K+1):
            feats.append(torch.sin(k * self.pi * y))
            feats.append(torch.cos(k * self.pi * y))
        return torch.cat(feats, dim=1)  # [B, 1+2K]

class DeepONetParametric(nn.Module):
    def __init__(self, latent=64, hidden=(128,128), act=nn.Tanh, fourier_K=3):
        super().__init__()
        self.branch = MLP(3, latent, hidden=hidden, act=act)
        self.ff = FourierFeatures(K=fourier_K)
        self.trunk  = MLP(1 + 2*fourier_K, latent, hidden=hidden, act=act)
        self.bias = nn.Parameter(torch.zeros(1))
    def forward(self, p, y):
        b = self.branch(p)
        t = self.trunk(self.ff(y))
        s = (b * t).sum(dim=1, keepdim=True) + self.bias
        return torch.sigmoid(s)  # keep q in [0,1]


In [None]:
@torch.no_grad()
def rmse(model, loader, device):
    model.eval()
    se, n = 0.0, 0
    for p, y, q in loader:
        p, y, q = p.to(device), y.to(device), q.to(device)
        pred = model(p, y)
        se += ((pred - q) ** 2).sum().item()
        n  += q.numel()
    return math.sqrt(se / n)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
model  = DeepONetParametric(latent=64, hidden=(128,128), fourier_K=3).to(device)
opt    = torch.optim.Adam(model.parameters(), lr=1e-3)

loss_fn = nn.MSELoss()
epochs  = 300
hist = []  # (epoch, train_rmse, test_rmse)

best_te = float('inf')
best_state = None

for ep in range(1, epochs+1):
    model.train()
    for p, y, q in tr_loader:
        p, y, q = p.to(device), y.to(device), q.to(device)
        opt.zero_grad()
        pred = model(p, y)
        data_loss = loss_fn(pred, q)

        # reuse first two params (if batch>=2)
        if p.shape[0] >= 2:
            p_bc = p[:2]
        else:
            p_bc = p.mean(dim=0, keepdim=True).repeat(2,1)
        y_bc = torch.tensor([[-1.0],[1.0]], dtype=torch.float32, device=device)
        q_bc = model(p_bc, y_bc)
        bc_loss = (q_bc[0,0] - 0.0)**2 + (q_bc[1,0] - 1.0)**2

        loss = data_loss + 1e-2 * bc_loss 
        loss.backward()
        opt.step()

    if ep == 1 or ep % 10 == 0 or ep == epochs:
        tr_rmse = rmse(model, tr_loader, device)
        te_rmse = rmse(model, te_loader, device)
        hist.append((ep, tr_rmse, te_rmse))
        print(f"Epoch {ep:4d} | Train RMSE {tr_rmse:.4e} | Test RMSE {te_rmse:.4e}")

        # Save best checkpoint by test RMSE
        if te_rmse < best_te - 1e-5:
            best_te = te_rmse
            best_state = {k: v.detach().cpu().clone() for k,v in model.state_dict().items()}

# Restore best
if best_state is not None:
    model.load_state_dict(best_state)
    print(f"Restored best checkpoint with Test RMSE = {best_te:.4e}")

Epoch    1 | Train RMSE 9.8968e-02 | Test RMSE 1.0930e-01
Epoch   10 | Train RMSE 3.7777e-02 | Test RMSE 4.5389e-02
Epoch   20 | Train RMSE 1.6411e-02 | Test RMSE 2.2539e-02
Epoch   30 | Train RMSE 1.3128e-02 | Test RMSE 1.9287e-02
Epoch   40 | Train RMSE 1.2110e-02 | Test RMSE 1.7171e-02
Epoch   50 | Train RMSE 1.1426e-02 | Test RMSE 1.5248e-02
Epoch   60 | Train RMSE 1.2743e-02 | Test RMSE 1.6300e-02
Epoch   70 | Train RMSE 1.2547e-02 | Test RMSE 1.5258e-02
Epoch   80 | Train RMSE 1.0189e-02 | Test RMSE 1.3274e-02
Epoch   90 | Train RMSE 8.3074e-03 | Test RMSE 1.1819e-02
Epoch  100 | Train RMSE 8.4933e-03 | Test RMSE 1.2366e-02
Epoch  110 | Train RMSE 8.1789e-03 | Test RMSE 1.1329e-02
Epoch  120 | Train RMSE 6.6667e-03 | Test RMSE 1.0737e-02
Epoch  130 | Train RMSE 9.8078e-03 | Test RMSE 1.2274e-02
Epoch  140 | Train RMSE 6.6871e-03 | Test RMSE 1.0638e-02
Epoch  150 | Train RMSE 7.9545e-03 | Test RMSE 1.0843e-02
Epoch  160 | Train RMSE 5.5922e-03 | Test RMSE 9.0574e-03
Epoch  170 | T

In [None]:
torch.save(model.state_dict(), "deeponet_parametric.pt")
metrics_df = pd.DataFrame(hist, columns=["epoch", "train_rmse", "test_rmse"])
metrics_df.to_csv("metrics.csv", index=False)
metrics_df

Unnamed: 0,epoch,train_rmse,test_rmse
0,1,0.098968,0.109299
1,10,0.037777,0.045389
2,20,0.016411,0.022539
3,30,0.013128,0.019287
4,40,0.01211,0.017171
5,50,0.011426,0.015248
6,60,0.012743,0.0163
7,70,0.012547,0.015258
8,80,0.010189,0.013274
9,90,0.008307,0.011819


In [None]:
# Pick first test sample's parameters (already normalized)
p0n, p1n, p2n = P_te[0, 0], P_te[0, 1], P_te[0, 2]

p0 = p0n * 5.0 + 0.0
p1 = p1n * 1.0 + (-0.5)
p2 = p2n * 0.45 + 0.25

y_line = np.linspace(-1, 1, 201).astype(np.float32)
x_f = np.linspace(-1.0, 1.0, 1001)
f_x = Vprime_param(x_f, float(p0), float(p1), float(p2))
q_true = comm.solve1Dcommittor(256, 1.0, 0.0, f_x, x_f, 1.0, y_line)

with torch.no_grad():
    P_rep = torch.tensor(np.tile([p0n,p1n,p2n], (y_line.size,1)), dtype=torch.float32).to(device)
    Y_rep = torch.tensor(y_line[:,None], dtype=torch.float32).to(device)
    q_pred = model(P_rep, Y_rep).cpu().numpy().squeeze()

pd.DataFrame({"y": y_line, "q_true": q_true, "q_pred": q_pred}).head()

Unnamed: 0,y,q_true,q_pred
0,-1.0,-2.97852e-15,0.009882
1,-0.99,0.00246343,0.010666
2,-0.98,0.004929374,0.011563
3,-0.97,0.007399752,0.012584
4,-0.96,0.009876449,0.013739


In [None]:
deeponet_train_rmse = rmse(model, tr_loader, device)
deeponet_test_rmse = rmse(model, te_loader, device)
deeponet_metrics = {
    "model": "DeepONet",
    "dataset": "parametric",
    "train_rmse": float(deeponet_train_rmse),
    "test_rmse": float(deeponet_test_rmse),
}
with open("deeponet_results.json", "w") as f:
    json.dump(deeponet_metrics, f, indent=2)
deeponet_metrics

{'model': 'DeepONet',
 'dataset': 'parametric',
 'train_rmse': 0.003919191968680557,
 'test_rmse': 0.006677686448951258}