In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.autograd as autograd
import math, time

torch.set_default_dtype(torch.float64)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---------- Problem definition ----------
pi = math.pi

def u_exact_np(X):
    return np.sin(2*pi*X[:,0]) * np.sin(2*pi*X[:,1])

def f_forcing_np(X):
    return (2*pi)**2 * 2.0 * u_exact_np(X)

def u_exact_torch(x):
    return torch.sin(2*pi*x[:,0]) * torch.sin(2*pi*x[:,1])

def f_forcing_torch(x):
    return (2*pi)**2 * 2.0 * torch.sin(2*pi*x[:,0]) * torch.sin(2*pi*x[:,1])

# ---------- Sampling ----------
def sample_grid_in_box(n_per_dim=50):
    xs = np.linspace(-1.0, 1.0, n_per_dim, dtype=np.float64)
    X, Y = np.meshgrid(xs, xs)
    pts = np.stack([X.ravel(), Y.ravel()], axis=-1)
    return pts

def sample_boundary_points(n_side=50):
    s = np.linspace(-1.0, 1.0, n_side, dtype=np.float64)
    pts = []
    for y in s: pts.append([-1.0, y])
    for y in s: pts.append([1.0, y])
    for x in s: pts.append([x, -1.0])
    for x in s: pts.append([x,  1.0])
    return np.array(pts)

# ---------- TransNet helpers ----------
def sigma(s):
    return np.tanh(s)
def sigma_dd(s):
    t = np.tanh(s)
    sech2 = 1 - t*t
    return -2.0 * t * sech2

def sample_a_r(M, d=2, R=1.5, seed=1234):
    rng = np.random.RandomState(seed)
    A = rng.randn(M, d)
    norms = np.linalg.norm(A, axis=1, keepdims=True)
    A /= norms
    r = rng.rand(M) * R
    return A, r

def build_matrices(X_int, X_bd, A, rvec, gamma):
    Xc_int = X_int
    Xc_bd = X_bd
    s_int = Xc_int.dot(A.T) + rvec.reshape(1,-1)
    s_bd = Xc_bd.dot(A.T) + rvec.reshape(1,-1)
    S_int = gamma * s_int
    S_bd = gamma * s_bd
    Psi_int = sigma(S_int)
    Psi_bd = sigma(S_bd)
    Psi_dd_int = (gamma**2) * sigma_dd(S_int)
    F_int = -Psi_dd_int
    return F_int, Psi_bd, Psi_int, s_int, s_bd

def solve_alpha_ls(F_int, Psi_bd, f_int, g_bd, lambda_L=1.0, lambda_B=1.0, reg=1e-8):
    wL = math.sqrt(lambda_L)
    wB = math.sqrt(lambda_B)
    A_big = np.vstack([wL*F_int, wB*Psi_bd])
    rhs = np.concatenate([wL*f_int, wB*g_bd])
    AtA = A_big.T @ A_big + reg*np.eye(A_big.shape[1])
    Atb = A_big.T @ rhs
    alpha = np.linalg.solve(AtA, Atb)
    return alpha

def u_base_numpy(X, A, rvec, gamma, alpha):
    s = X.dot(A.T) + rvec.reshape(1,-1)
    S = gamma*s
    Psi = sigma(S)
    return Psi.dot(alpha)

# ========== Step 1: TransNet Baseline ==========
M = 300
grid_n = 50
bd_side = 50
X_int = sample_grid_in_box(grid_n)
X_bd = sample_boundary_points(bd_side)
f_int = f_forcing_np(X_int)
g_bd = u_exact_np(X_bd)

A, rvec = sample_a_r(M)

# Golden search for gamma
def eta_of_gamma(gamma):
    F_int, Psi_bd, _, _, _ = build_matrices(X_int, X_bd, A, rvec, gamma)
    alpha = solve_alpha_ls(F_int, Psi_bd, f_int, g_bd)
    resid_int = F_int @ alpha - f_int
    resid_bd = Psi_bd @ alpha - g_bd
    mse_int = np.mean(resid_int**2)
    mse_bd = np.mean(resid_bd**2)
    return mse_int + mse_bd, mse_int, mse_bd       #这里后面两个mse是在每给定一组alpha后，计算的pde残差和边界残差

def golden_search(func, a, b, tol=1e-3, max_iters=50):
    phi = (1 + 5**0.5)/2
    invphi = 1/phi
    c = b - invphi*(b-a)
    d = a + invphi*(b-a)
    fc = func(c)
    fd = func(d)
    for _ in range(max_iters):
        if (b - a) < tol: break
        if fc[0] < fd[0]:
            b = d
            d = c
            fd = fc
            c = b - invphi*(b - a)
            fc = func(c)
        else:
            a = c
            c = d
            fc = fd
            d = a + invphi*(b - a)
            fd = func(d)
    return (c, fc) if fc[0] < fd[0] else (d, fd)

gamma_opt, best = golden_search(eta_of_gamma, 1e-2, 10.0)
alpha_opt = solve_alpha_ls(*build_matrices(X_int, X_bd, A, rvec, gamma_opt)[:2], f_int, g_bd)

print(f"Baseline gamma_opt={gamma_opt:.6f}, MSE_int={best[1]:.3e}, MSE_bd={best[2]:.3e}")

# Evaluate baseline error
X_test = sample_grid_in_box(100)
u_pred_base = u_base_numpy(X_test, A, rvec, gamma_opt, alpha_opt)
mse_base = np.mean((u_pred_base - u_exact_np(X_test))**2)
print(f"[Baseline TransNet] Interior MSE vs exact: {mse_base:.6e}")

Baseline gamma_opt=1.641822, MSE_int=3.061e-05, MSE_bd=7.161e-06
[Baseline TransNet] Interior MSE vs exact: 8.175459e-07


In [1]:
# ============================================
# Hybrid TransNet-NN after gamma_opt found
# Based on the correct TransNet baseline structure
# ============================================

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import math, time

torch.set_default_dtype(torch.float64)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ---------- Problem definition ----------
pi = math.pi

def u_exact_np(X):
    return np.sin(2*pi*X[:,0]) * np.sin(2*pi*X[:,1])

def f_forcing_np(X):
    return (2*pi)**2 * 2.0 * u_exact_np(X)

def u_exact_torch(x):
    return torch.sin(2*pi*x[:,0]) * torch.sin(2*pi*x[:,1])

def f_forcing_torch(x):
    return (2*pi)**2 * 2.0 * torch.sin(2*pi*x[:,0]) * torch.sin(2*pi*x[:,1])

# ---------- Sampling ----------
def sample_grid_in_box(n_per_dim=50):
    xs = np.linspace(-1.0, 1.0, n_per_dim, dtype=np.float64)
    X, Y = np.meshgrid(xs, xs)
    pts = np.stack([X.ravel(), Y.ravel()], axis=-1)
    return pts

def sample_boundary_points(n_side=50):
    s = np.linspace(-1.0, 1.0, n_side, dtype=np.float64)
    pts = []
    for y in s: pts.append([-1.0, y])
    for y in s: pts.append([1.0, y])
    for x in s: pts.append([x, -1.0])
    for x in s: pts.append([x,  1.0])
    return np.array(pts)

# ---------- TransNet core ----------
def sigma(s):
    return np.tanh(s)
def sigma_dd(s):
    t = np.tanh(s)
    sech2 = 1 - t*t
    return -2.0 * t * sech2

def sample_a_r(M, d=2, R=1.5, seed=1234):
    rng = np.random.RandomState(seed)
    A = rng.randn(M, d)
    norms = np.linalg.norm(A, axis=1, keepdims=True)
    A /= norms
    r = rng.rand(M) * R
    return A, r

def build_matrices(X_int, X_bd, A, rvec, gamma):
    Xc_int = X_int
    Xc_bd = X_bd
    s_int = Xc_int.dot(A.T) + rvec.reshape(1,-1)
    s_bd = Xc_bd.dot(A.T) + rvec.reshape(1,-1)
    S_int = gamma * s_int
    S_bd = gamma * s_bd
    Psi_int = sigma(S_int)
    Psi_bd = sigma(S_bd)
    Psi_dd_int = (gamma**2) * sigma_dd(S_int)
    F_int = -Psi_dd_int
    return F_int, Psi_bd, Psi_int, s_int, s_bd

def solve_alpha_ls(F_int, Psi_bd, f_int, g_bd, lambda_L=1.0, lambda_B=1.0, reg=1e-8):
    wL = math.sqrt(lambda_L)
    wB = math.sqrt(lambda_B)
    A_big = np.vstack([wL*F_int, wB*Psi_bd])
    rhs = np.concatenate([wL*f_int, wB*g_bd])
    AtA = A_big.T @ A_big + reg*np.eye(A_big.shape[1])
    Atb = A_big.T @ rhs
    alpha = np.linalg.solve(AtA, Atb)
    return alpha

def u_base_numpy(X, A, rvec, gamma, alpha):
    s = X.dot(A.T) + rvec.reshape(1,-1)
    S = gamma*s
    Psi = sigma(S)
    return Psi.dot(alpha)

# ========== Step 1: Use the known gamma_opt ==========
gamma_opt = 1.641822   # <-- use the value found in the first script
M = 300
grid_n = 50
bd_side = 50

X_int = sample_grid_in_box(grid_n)
X_bd = sample_boundary_points(bd_side)
f_int = f_forcing_np(X_int)
g_bd = u_exact_np(X_bd)
A, rvec = sample_a_r(M)

# ========== Step 2: Baseline TransNet ==========
F_int, Psi_bd, _, _, _ = build_matrices(X_int, X_bd, A, rvec, gamma_opt)
alpha_opt = solve_alpha_ls(F_int, Psi_bd, f_int, g_bd)
X_test = sample_grid_in_box(100)
u_pred_base = u_base_numpy(X_test, A, rvec, gamma_opt, alpha_opt)
mse_base = np.mean((u_pred_base - u_exact_np(X_test))**2)
print(f"[Baseline TransNet] gamma_opt={gamma_opt:.6f}, Interior MSE={mse_base:.6e}")

# ========== Step 3: Hybrid TransNet + MLP correction ==========
class HybridModel(nn.Module):
    def __init__(self, A_np, r_np, gamma, hidden=64):
        super().__init__()
        self.A = torch.tensor(A_np, dtype=torch.float64)
        self.r = torch.tensor(r_np, dtype=torch.float64)
        self.gamma = gamma
        M = self.A.shape[0]
        self.mlp = nn.Sequential(
            nn.Linear(M, hidden),
            nn.Tanh(),
            nn.Linear(hidden, 1)
        )
    def forward(self, X):
        s = X @ self.A.T + self.r
        Z = torch.tanh(self.gamma * s)
        return self.mlp(Z).squeeze(-1)

def pde_loss(model, X_int, X_bd, f_int, g_bd, lamL=1.0, lamB=1.0):
    X_int.requires_grad_(True)
    u_pred = model(X_int)
    grads = torch.autograd.grad(u_pred.sum(), X_int, create_graph=True)[0]
    dudx, dudy = grads[:,0], grads[:,1]
    d2udx2 = torch.autograd.grad(dudx.sum(), X_int, create_graph=True)[0][:,0]
    d2udy2 = torch.autograd.grad(dudy.sum(), X_int, create_graph=True)[0][:,1]
    lap_u = d2udx2 + d2udy2
    res_int = -lap_u - f_int
    loss_int = torch.mean(res_int**2)
    u_bd_pred = model(X_bd)
    res_bd = u_bd_pred - g_bd
    loss_bd = torch.mean(res_bd**2)
    return lamL*loss_int + lamB*loss_bd, loss_int, loss_bd

# prepare torch tensors
X_int_t = torch.tensor(X_int, dtype=torch.float64).to(device)
X_bd_t = torch.tensor(X_bd, dtype=torch.float64).to(device)
f_int_t = torch.tensor(f_int, dtype=torch.float64).to(device)
g_bd_t = torch.tensor(g_bd, dtype=torch.float64).to(device)

# train hybrid
model = HybridModel(A, rvec, gamma_opt, hidden=64).to(device)
opt = optim.Adam(model.parameters(), lr=1e-3)
epochs = 2000
print("Start training Hybrid model...")
for ep in range(1, epochs+1):
    opt.zero_grad()
    loss, li, lb = pde_loss(model, X_int_t, X_bd_t, f_int_t, g_bd_t)
    loss.backward()
    opt.step()
    if ep % 200 == 0 or ep == 1:
        print(f"Epoch {ep:4d}: total={loss.item():.3e}, PDE={li.item():.3e}, BD={lb.item():.3e}")

# evaluate
model.eval()
with torch.no_grad():
    X_test_t = torch.tensor(X_test, dtype=torch.float64).to(device)
    u_pred_hybrid = model(X_test_t).cpu().numpy()
mse_hybrid = np.mean((u_pred_hybrid - u_exact_np(X_test))**2)

print("\n========== Summary ==========")
print(f"gamma_opt      = {gamma_opt:.6f}")
print(f"Baseline MSE   = {mse_base:.6e}")
print(f"Hybrid MSE     = {mse_hybrid:.6e}")


[Baseline TransNet] gamma_opt=1.641822, Interior MSE=8.181998e-07
Start training Hybrid model...
Epoch    1: total=1.495e+03, PDE=1.495e+03, BD=1.222e-02
Epoch  200: total=9.064e+00, PDE=5.048e+00, BD=4.016e+00
Epoch  400: total=4.315e+00, PDE=1.571e+00, BD=2.743e+00
Epoch  600: total=2.646e+00, PDE=9.002e-01, BD=1.746e+00
Epoch  800: total=1.736e+00, PDE=6.220e-01, BD=1.114e+00
Epoch 1000: total=1.144e+00, PDE=4.349e-01, BD=7.088e-01
Epoch 1200: total=7.956e-01, PDE=3.417e-01, BD=4.539e-01
Epoch 1400: total=5.796e-01, PDE=2.858e-01, BD=2.938e-01
Epoch 1600: total=4.195e-01, PDE=2.243e-01, BD=1.952e-01
Epoch 1800: total=3.258e-01, PDE=1.901e-01, BD=1.357e-01
Epoch 2000: total=4.538e-01, PDE=3.546e-01, BD=9.922e-02

gamma_opt      = 1.641822
Baseline MSE   = 8.181998e-07
Hybrid MSE     = 2.914959e-02
