### Notes
### My choice: [Adam, PCGrad] -> LBFGS (90 percent -> 10 percent) => Have to show that this is better than only-Adam or only-LBFGS approach. Pls try uncert as well.
### Average performance (approx. 5 times) is possible in the context of the inverse problem.

In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.autograd import Variable, grad

import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from mpl_toolkits.axes_grid1 import make_axes_locatable
import scipy
import scipy.io as io
from pyDOE import lhs

from sklearn.metrics import mean_squared_error, mean_absolute_error

from pysr import pysr, best, best_callable

### For automatic PDE recovery ###
from gplearn.genetic import SymbolicRegressor
import sympy
from sympy import *
from sympy.parsing.sympy_parser import parse_expr
from sympy.core import evaluate
from utils import *

# Multi-task learning loss, PCGrad
from pcgrad import PCGrad
from pcgradpytorch import pcgrad, lbfgsnew

In [17]:
data = io.loadmat('data/burgers_shock.mat')

t = data['t'].flatten()[:,None]
x = data['x'].flatten()[:,None]
Exact = np.real(data['usol']).T

X, T = np.meshgrid(x,t)

X_star = np.hstack((X.flatten()[:,None], T.flatten()[:, None]))
u_star = Exact.flatten()[:,None]

# Doman bounds
lb = X_star.min(0)
ub = X_star.max(0)

N = 2000
idx = np.random.choice(X_star.shape[0], N, replace=False)
X_u_train = X_star[idx, :]
u_train = u_star[idx,:]

In [18]:
choice = 'pysr'

if choice == 'gplearn':
    ### Using a gplearn model ###
    program = pickle_load('./data/gp_symreg.pkl')._program
elif choice == 'pysr':
    ### Using a pysr model ###
    program = best(pickle_load('./saved_path_inverse_burger/equations_from_pysr.pkl'))

expr, vars = build_exp(program)
expr = expr.subs(Integer(-1), Float(-1.0, precision=53))
# expr = manipulate_expr(expr)
print(expr)

Loaded from ./saved_path_inverse_burger/equations_from_pysr.pkl
-0.6860516*x0*x1 + 0.0020575821*x2


In [19]:
constants = list(expr.atoms(Number))

# Replace all positive coeffs with Sympy symbols 
with evaluate(False):
    for i in range(len(constants)):
        var = Symbol('C'+str(i))
        expr = expr.subs(abs(constants[i]), var)
variables = list(expr.atoms(Symbol))

# Use the log form for the parameters that are less than 0.005
# Have to store the indices which require torch.exp
exp_indices = []
for i in range(len(constants)):
    constants[i] = abs(constants[i])
    if constants[i] < 0.005:
        constants[i] = log(constants[i])
        exp_indices.append(i)

pde_params = nn.Parameter(data=torch.tensor(constants, dtype=torch.float32))

In [20]:
### Sorting Symbols ###
values = [string2int(str(e)) for e in variables]
tmp = []
for i in range(len(values)):
    tmp.append((values[i], variables[i]))
tmp.sort()
variables = [e[1] for e in tmp]
del tmp

variables

[C0, C1, x0, x1, x2]

In [21]:
fn = lambdify(tuple(variables), expr)

print('-- loss_fn --')
print(expr)

print('-- pde_params --')
print(pde_params)

-- loss_fn --
C0*x2 + (-C1)*x0*x1
-- pde_params --
Parameter containing:
tensor([-6.1862,  0.6861], requires_grad=True)


In [22]:
class Network(nn.Module):
    def __init__(self, model, pde_parameters, loss_fn, exp_indices):
        super(Network, self).__init__()
        self.model = model
        self.model.apply(self.xavier_init)
        self.callable_loss_fn = loss_fn
        self.pde_parameters = pde_parameters
        self.exp_indices = exp_indices
        
    def xavier_init(self, m):
        if type(m) == nn.Linear:
            torch.nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.01)
        
    def forward(self, x, t):
        return self.model(torch.cat([x, t], dim=1))
    
    def loss(self, x, t, y_input, update_network_params=True, update_pde_params=True):
        total_loss = []
        uf = self.forward(x, t)
        if update_network_params:
            mse_loss = self.mean_squared(uf - y_input)
            total_loss.append(mse_loss)
        
        if update_pde_params:
            tmp_params = torch.zeros(self.pde_parameters.shape)
            for idx in range(self.pde_parameters.shape[0]):
                if idx in self.exp_indices: 
                    tmp_params[idx] = torch.exp(self.pde_parameters[idx])
                else: 
                    tmp_params[idx] = self.pde_parameters[idx]
                
            # PDE Loss calculation
            u_t = self.gradients(uf, t)[0]
            u_x = self.gradients(uf, x)[0]
            u_xx = self.gradients(u_x, x)[0]
            
            l_eq = self.mean_squared(u_t-self.callable_loss_fn(tmp_params[0], tmp_params[1], uf, u_x, u_xx))

            total_loss.append(l_eq)
            
        return total_loss
    
    def get_gradients_dict(self, x, t):
        self.eval()
        
        uf = self.forward(x, t)
        ß
        ### PDE Loss calculation ###
        # first-order derivatives
        u_t = self.gradients(uf, t)[0]
        u_x = self.gradients(uf, x)[0]
        # Homo second-order derivatives
        u_tt = self.gradients(u_t,t)[0]
        u_xx = self.gradients(u_x, x)[0]
        # Hetero second-order derivatives
        u_xt = self.gradients(u_t, x)[0]
        u_tx = self.gradients(u_x, t)[0]
        
        return {'uf':uf, 'u_x':u_x, 'u_xx':u_xx}, u_t
    
    def mean_squared(self, inn_tensor):
        return (inn_tensor**2).mean()
    
    def gradients(self, func, x):
        return grad(func, x, create_graph=True, retain_graph=True, grad_outputs=torch.ones(func.shape))

In [23]:
pretrained = False
hidden_nodes = 50

model = nn.Sequential(nn.Linear(2, hidden_nodes), 
                        nn.Tanh(), 
                        nn.Linear(hidden_nodes, hidden_nodes),
                        nn.Tanh(), 
                        nn.Linear(hidden_nodes, hidden_nodes),
                        nn.Tanh(), 
                        nn.Linear(hidden_nodes, hidden_nodes),
                        nn.Tanh(),
                        nn.Linear(hidden_nodes, 1))

network = Network(model=model, pde_parameters=pde_params, loss_fn=fn, exp_indices=exp_indices)
if pretrained: network.load_state_dict(torch.load("./saved_path_inverse_burger/nn_without_physical_reg.pth"), strict=False)

In [24]:
X_u_train = torch.tensor(X_u_train).float().requires_grad_(True)
u_train = torch.tensor(u_train).float().requires_grad_(True)

X_star = torch.tensor(X_star).float().requires_grad_(True)
u_star = torch.tensor(u_star).float().requires_grad_(True)

In [25]:
optimizer1 = torch.optim.Adam(network.parameters(), lr=5e-3)  # metaopt also has .parameters()
epochs1 = 10000
# weights_path = './saved_path_inverse_burger/frozen_pinn.pth'
# weights_path = './saved_path_inverse_burger/nn_with_physical_reg_from_symreg.pth'

In [26]:
def closure():
    if torch.is_grad_enabled():
        optimizer2.zero_grad()
    losses = network.loss(X_u_train[:, 0:1], X_u_train[:, 1:2], u_train, update_network_params=True, update_pde_params=True)
    l = sum(losses)
    if l.requires_grad:
        l.backward()
    return l

def mtl_closure():
    n_obj = 2
    losses = network.loss(X_u_train[:, 0:1], X_u_train[:, 1:2], u_train, update_network_params=True, update_pde_params=True)
    updated_grads = []
    
    for i in range(n_obj):
        optimizer1.zero_grad()
        losses[i].backward(retain_graph=True)

        g_task = []
        for param in network.parameters():
            if param.grad is not None:
                g_task.append(Variable(param.grad.clone(), requires_grad=False))
            else:
                g_task.append(Variable(torch.zeros(param.shape), requires_grad=False))

        updated_grads.append(g_task)

    updated_grads = list(pcgrad.pc_grad_update(updated_grads))[0]
    for idx, param in enumerate(network.parameters()): 
        param.grad = torch.tensor(data=updated_grads[0][idx]+updated_grads[1][idx], requires_grad=True)
        
    return sum(losses)

In [27]:
network.train(); best_train_loss = 1e6
print('1st Phase optimization using LBFGS')
for i in range(epochs1):
    optimizer1.step(mtl_closure)
    l = mtl_closure()
    
    if (i % 1000) == 0:
        print("Epoch {}: ".format(i), l.item())
        
optimizer2 = torch.optim.LBFGS(network.parameters(), lr=5e-2, max_iter=50, max_eval=50, history_size=100, line_search_fn='strong_wolfe')
epochs2 = 1000
print('2nd Phase optimization using Adam with PCGrad gradient modification')
for i in range(epochs2):
    optimizer2.step(closure)
    l = closure()

    if (i % 100) == 0:
        print("Epoch {}: ".format(i), l.item())

1st Phase optimization
Epoch 0:  0.38079485297203064




Epoch 1000:  0.0069985222071409225
Epoch 2000:  0.003476529149338603
Epoch 3000:  0.0037351776845753193
Epoch 4000:  0.0012994545977562666
Epoch 5000:  0.0020747403614223003
Epoch 6000:  0.001198361860588193
Epoch 7000:  0.008038699626922607
Epoch 8000:  0.0017276732251048088
Epoch 9000:  0.0005618932191282511
2nd Phase optimization
Epoch 0:  0.00023802494979463518
Epoch 100:  4.41265410699998e-06
Epoch 200:  4.186397745797876e-06
Epoch 300:  4.186397745797876e-06
Epoch 400:  4.186397745797876e-06
Epoch 500:  4.186397745797876e-06
Epoch 600:  4.186397745797876e-06
Epoch 700:  4.186397745797876e-06
Epoch 800:  4.186397745797876e-06
Epoch 900:  4.186397745797876e-06


In [28]:
est_lambdas = [e.detach().item() for e in network.pde_parameters]
for idx in exp_indices:
    est_lambdas[idx] = np.exp(est_lambdas[idx])
est_lambdas

[0.0031869084928008618, 0.9995834231376648]

In [29]:
### Loading the best weights ###
# network.load_state_dict(torch.load(weights_path))

In [30]:
network.eval()

Network(
  (model): Sequential(
    (0): Linear(in_features=2, out_features=50, bias=True)
    (1): Tanh()
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): Tanh()
    (4): Linear(in_features=50, out_features=50, bias=True)
    (5): Tanh()
    (6): Linear(in_features=50, out_features=50, bias=True)
    (7): Tanh()
    (8): Linear(in_features=50, out_features=1, bias=True)
  )
)

In [31]:
est_lambdas

[0.0031869084928008618, 0.9995834231376648]

In [33]:
### pls chk this code before actual evaluation ###
nu = 0.01 / np.pi
grounds = [nu, 1]

error_lambda_1 = np.abs(est_lambdas[0] - grounds[0]) / grounds[0] * 100
error_lambda_2 = np.abs(est_lambdas[1] - grounds[1]) / grounds[1] * 100

error_lambda_1, error_lambda_2 # (0.11968308646106521 error on nu, 0.04165768623352051 error on 1)

(0.11968308646106521, 0.04165768623352051)

### Symbolic regression

In [None]:
grads_dict, target = network.get_gradients_dict(X_u_train[:, 0:1], X_u_train[:, 1:2])
index2features = grads_dict.keys()
print(index2features)

G = torch.cat(list(grads_dict.values()), dim=1).detach().numpy()
target = torch.squeeze(target).detach().numpy()

In [None]:
equations = pysr(G, target, niterations=100, binary_operators=["plus", "sub", "mult"], unary_operators=[], batching=True)

In [None]:
print(best(equations))
fn = best_callable(equations)

In [None]:
uf = grads_dict['uf']
u_x = grads_dict['u_x']
u_xx = grads_dict['u_xx']

In [None]:
# Exreacted equation (for further fine-tuning)
# u_t + 0.6860763*uf*u_x - 0.0020577204*u_xx