In [1]:
%load_ext autoreload
%autoreload 2 
%reload_ext autoreload
%matplotlib inline
import matplotlib.pyplot as plt

# always import gbm_algos first !
import xgboost, lightgbm, catboost
from gplearn.genetic import SymbolicRegressor

# To access the contents of the parent dir
import sys; sys.path.insert(0, '../')
import os
from scipy.io import loadmat
from utils import *
from preprocess import *
from models import RobustPCANN

# Let's do facy optimizers
from optimizers import Lookahead, AdamGC, SGDGC
from madgrad import MADGRAD
from lbfgsnew import LBFGSNew

from pytorch_robust_pca import *

# Modify at /usr/local/lib/python3.9/site-packages/torch_lr_finder/lr_finder.py
from torch_lr_finder import LRFinder

# Tracking
from tqdm import trange

# Symbolics
import sympy
import sympytorch

# BayesianOptimization
from bayes_opt import BayesianOptimization
from skopt import Optimizer

# hyperopt
from hyperopt import hp, fmin, tpe

Running Python 3.9.7
You can use npar for np.array


In [2]:
DATA_PATH = "../experimental_data/burgers_shock.mat"
data = loadmat(DATA_PATH)

t = data['t'].flatten()[:,None]
x = data['x'].flatten()[:,None]
Exact = np.real(data['usol']).T

# Adding noise
noise_intensity = 0.0
noisy_xt = False

if noise_intensity>0.0:
    Exact = perturb(Exact, intensity=noise_intensity, noise_type="normal")
    print("Perturbed Exact with intensity =", float(noise_intensity))
else: print("Clean Exact")

X, T = np.meshgrid(x,t)

X_star = np.hstack((X.flatten()[:,None], T.flatten()[:,None]))
u_star = Exact.flatten()[:,None]

if noisy_xt and noise_intensity>0.0:
    print("Noisy (x, t)")
    X_star = perturb(X_star, intensity=noise_intensity, noise_type="normal")
else: print("Clean (x, t)")

# Doman bounds
lb = X_star.min(0)
ub = X_star.max(0)

N = 2000
print(f"Training with {N} samples")
idx = np.random.choice(X_star.shape[0], N, replace=False)
X_u_train = X_star[idx, :]
u_train = u_star[idx,:]

# Convert to torch.tensor
X_u_train = to_tensor(X_u_train, True)
u_train = to_tensor(u_train, False)

scaling_factor = 1.0
lb = scaling_factor*to_tensor(lb, False)
ub = scaling_factor*to_tensor(ub, False)

# Feature names, base on the symbolic regression results
feature_names = ('uf', 'u_x', 'u_xx'); feature2index = {}

Clean Exact
Clean (x, t)
Training with 10000 samples


In [3]:
# Noiseless program
program = '''
-0.970158*uf*u_x+0.003090*u_xx
'''
pde_expr, variables = build_exp(program); print(pde_expr, variables)
mod = sympytorch.SymPyModule(expressions=[pde_expr]); mod.train()

-0.970158*u_x*uf + 0.00309*u_xx {u_x, uf, u_xx}


SymPyModule(expressions=(-0.970158*u_x*uf + 0.00309*u_xx,))

In [4]:
list(mod.parameters())[0], list(mod.parameters())[1]

(Parameter containing:
 tensor(0.0031, requires_grad=True),
 Parameter containing:
 tensor(-0.9702, requires_grad=True))

In [5]:
class RobustPINN(nn.Module):
    def __init__(self, model, loss_fn, index2features, scale=False, lb=None, ub=None, pretrained=False, noiseless_mode=True, init_cs=(0.5, 0.5), init_betas=(0.0, 0.0)):
        super(RobustPINN, self).__init__()
        self.model = model
        if not pretrained: self.model.apply(self.xavier_init)
        
        self.noiseless_mode = noiseless_mode
        self.in_fft_nn = None; self.out_fft_nn = None
        self.inp_rpca = None; self.out_rpca = None
        if not self.noiseless_mode:
            # FFTNN
            self.in_fft_nn = FFTTh(c=init_cs[0])
            self.out_fft_nn = FFTTh(c=init_cs[1])

            # Robust Beta-PCA
            self.inp_rpca = RobustPCANN(beta=0.0, is_beta_trainable=True, inp_dims=2, hidden_dims=32)
            self.out_rpca = RobustPCANN(beta=0.0, is_beta_trainable=True, inp_dims=1, hidden_dims=32)
        
        self.p0 = torch.log(list(loss_fn.parameters())[0])
        self.p1 = list(loss_fn.parameters())[1]
        
        self.index2features = index2features; self.feature2index = {}
        for idx, fn in enumerate(self.index2features): self.feature2index[fn] = str(idx)
        self.scale = scale; self.lb, self.ub = lb, ub
        self.diff_flag = diff_flag(self.index2features)
        
    def xavier_init(self, m):
        if type(m) == nn.Linear:
            torch.nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.01)
        
    def forward(self, x, t):
        H = torch.cat([x, t], dim=1)
        if self.scale: H = self.neural_net_scale(H)
        return self.model(H)
    
    def loss(self, X_input, X_input_noise, y_input, y_input_noise, exp_p0=True, update_network_params=True, update_pde_params=True):
        # Denoising process
        if not self.noiseless_mode:
            # (1) Denoising FFT on (x, t)
            # This line returns the approx. recon.
            X_input_noise = cat(torch.fft.ifft(self.in_fft_nn(X_input_noise[1])*X_input_noise[0]).real.reshape(-1, 1), 
                                torch.fft.ifft(self.in_fft_nn(X_input_noise[3])*X_input_noise[2]).real.reshape(-1, 1))
            X_input_noise = X_input-X_input_noise
            X_input = self.inp_rpca(X_input, X_input_noise, normalize=True)
            
            # (2)D enoising FFT on y_input
            y_input_noise = y_input-torch.fft.ifft(self.out_fft_nn(y_input_noise[1])*y_input_noise[0]).real.reshape(-1, 1)
            y_input = self.out_rpca(y_input, y_input_noise, normalize=True)
        
        grads_dict, u_t = self.grads_dict(X_input[:, 0:1], X_input[:, 1:2])
        
        total_loss = []
        # MSE Loss
        if update_network_params:
            mse_loss = F.mse_loss(grads_dict["uf"], y_input)
            total_loss.append(mse_loss)
            
        # PDE Loss
        if update_pde_params:
            if exp_p0: p0_coeff = torch.exp(self.p0)
            else: p0_coeff = self.p0
            l_eq = F.mse_loss(p0_coeff*grads_dict["u_xx"]+self.p1*grads_dict["uf"]*grads_dict["u_x"], u_t)
            total_loss.append(l_eq)
            
        return total_loss
    
    def grads_dict(self, x, t):
        uf = self.forward(x, t)
        u_t = self.gradients(uf, t)[0]
        u_x = self.gradients(uf, x)[0]
        u_xx = self.gradients(u_x, x)[0]
        
        return {"uf":uf, "u_x":u_x, "u_xx":u_xx}, u_t
    
    def gradients(self, func, x):
        return grad(func, x, create_graph=True, retain_graph=True, grad_outputs=torch.ones(func.shape))
    
    def neural_net_scale(self, inp): 
        return -1.0+2.0*(inp-self.lb)/(self.ub-self.lb)

In [6]:
model = TorchMLP(dimensions=[2, 50, 50, 50 ,50, 50, 1], 
         activation_function=nn.Tanh, bn=None, # nn.LayerNorm
         dropout=None)

### TODO: How to load weights without using bn ###

# Pretrained model
semisup_model_state_dict = torch.load("./weights_nobn/semisup_model_nobn_2000_2000_finetuned.pth")
parameters = OrderedDict()
# Filter only the parts that I care about renaming (to be similar to what defined in TorchMLP).
inner_part = "network.model."
for p in semisup_model_state_dict:
    if inner_part in p:
        parameters[p.replace(inner_part, "")] = semisup_model_state_dict[p]

model.load_state_dict(parameters)

Using old implementation of TorchMLP. See models.py for more new model-related source code.


<All keys matched successfully>

In [7]:
F.mse_loss(model(X_u_train), u_train)

tensor(1.8194e-05, grad_fn=<MseLossBackward0>)

In [8]:
NOISELESS_MODE = True

In [9]:
_, x_fft, x_PSD = fft1d_denoise(X_u_train[:, 0:1], c=-5, return_real=True)
_, t_fft, t_PSD = fft1d_denoise(X_u_train[:, 1:2], c=-5, return_real=True)
_, u_train_fft, u_train_PSD = fft1d_denoise(u_train, c=-5, return_real=True)

In [10]:
def closure():
    global NOISELESS_MODE
    if torch.is_grad_enabled():
        optimizer2.zero_grad()
    losses = pinn.loss(X_u_train, (x_fft, x_PSD, t_fft, t_PSD), u_train, (u_train_fft, u_train_PSD), update_network_params=True, update_pde_params=True)
    l = sum(losses)
    if l.requires_grad:
        l.backward(retain_graph=True)
    return l

def mtl_closure():
    global NOISELESS_MODE
    losses = pinn.loss(X_u_train, (x_fft, x_PSD, t_fft, t_PSD), u_train, (u_train_fft, u_train_PSD), update_network_params=True, update_pde_params=True)
    updated_grads = []
    
    for i in range(len(losses)):
        optimizer1.zero_grad()
        losses[i].backward(retain_graph=True)

        g_task = []
        for param in pinn.parameters():
            if param.grad is not None:
                g_task.append(Variable(param.grad.clone(), requires_grad=False))
            else:
                g_task.append(Variable(torch.zeros(param.shape), requires_grad=False))
        # appending the gradients from each task
        updated_grads.append(g_task)

    updated_grads = list(pcgrad.pc_grad_update(updated_grads))[0]
    for idx, param in enumerate(pinn.parameters()): 
        param.grad = updated_grads[0][idx]+updated_grads[1][idx]
        
    return sum(losses)

In [11]:
if not NOISELESS_MODE:
    pinn = RobustPINN(model=model, loss_fn=mod, index2features=feature_names, 
                      scale=False, lb=None, ub=None, pretrained=True, noiseless_mode=False)

    def inference(args):
        global pinn
        c1, c2 = args
        pinn.in_fft_nn.c = nn.Parameter(data=torch.FloatTensor([float(c1)]), requires_grad=False)
        pinn.out_fft_nn.c = nn.Parameter(data=torch.FloatTensor([float(c2)]), requires_grad=False)
        losses = pinn.loss(X_u_train, (x_fft, x_PSD, t_fft, t_PSD), u_train, (u_train_fft, u_train_PSD), update_network_params=True, update_pde_params=True)
        return sum(losses).item()

    pinn.eval()
    space = [hp.uniform('c1', 0, 1), hp.uniform('c2', 0, 1)]
    res = fmin(fn=inference, space=space, algo=tpe.suggest, max_evals=200)

    print(res)
    if 'pinn' in globals(): del pinn

    pinn = RobustPINN(model=model, loss_fn=mod, index2features=feature_names, 
                      scale=False, lb=None, ub=None, pretrained=True, noiseless_mode=False,
                      init_cs=(res['c1'], res['c2']))
    
else: 
    pinn = RobustPINN(model=model, loss_fn=mod, index2features=feature_names, 
                      scale=False, lb=None, ub=None, pretrained=True, noiseless_mode=True)
    print("You are in noiseless mode.")

You are in noiseless mode.


In [12]:
epochs1, epochs2 = 10000, 50

In [13]:
# optimizer1 = MADGRAD(pinn.parameters(), lr=1e-7, momentum=0.95)
optimizer1 = AdamGC(pinn.parameters(), lr=6e-4, use_gc=True, gc_conv_only=False, gc_loc=False)
pinn.train(); best_train_loss = 1e6

print('1st Phase optimization using Adam with PCGrad gradient modification')
for i in range(epochs1):
    optimizer1.step(mtl_closure)
    if (i % 1000) == 0 or i == epochs1-1:
        l = mtl_closure()
        print("Epoch {}: ".format(i), l.item())

1st Phase optimization using Adam with PCGrad gradient modification
Epoch 0:  0.7813979387283325
Epoch 1000:  0.003025431651622057
Epoch 2000:  0.008694127202033997
Epoch 3000:  0.0020061233080923557
Epoch 4000:  0.0011806694092229009
Epoch 5000:  0.0004306812770664692
Epoch 6000:  0.00037557032192125916
Epoch 7000:  0.0007710368372499943
Epoch 8000:  0.00026818731566891074
Epoch 9000:  0.0005447733565233648
Epoch 9999:  0.00022808564244769514


In [14]:
optimizer2 = torch.optim.LBFGS(pinn.parameters(), lr=1e-1, max_iter=500, max_eval=int(500*1.25), history_size=500, line_search_fn='strong_wolfe')
print('2nd Phase optimization using LBFGS')
for i in range(epochs2):
    optimizer2.step(closure)
    if (i % 10) == 0 or i == epochs2-1:
        l = closure()
        print("Epoch {}: ".format(i), l.item())

2nd Phase optimization using LBFGS
Epoch 0:  6.383058644132689e-05
Epoch 10:  1.3216545994509943e-05
Epoch 20:  1.3216545994509943e-05
Epoch 30:  1.3216545994509943e-05
Epoch 40:  1.3216545994509943e-05
Epoch 49:  1.3216545994509943e-05


In [15]:
float(pinn.p1.detach().numpy()), float(np.exp(pinn.p0.detach().numpy()))

(-0.9977256059646606, 0.0030899993143975735)

In [19]:
const = 0.01/np.pi
errs = 100*npar([(1-0.998), (const-0.0031)/const])
errs.mean(), errs.std()

(1.4053138693582128, 1.2053138693582126)

In [20]:
# save(pinn, "pinn.pth")