In [1]:
import os
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from torch.autograd import Variable, grad

import matplotlib.pyplot as plt
from mpl_toolkits import mplot3d
from mpl_toolkits.axes_grid1 import make_axes_locatable
import scipy
import scipy.io as io
from pyDOE import lhs

from sklearn.metrics import mean_squared_error, mean_absolute_error

from pysr import pysr, best, best_callable
from gplearn.genetic import SymbolicRegressor

from utils import *
from ladder import LadderNetwork

In [11]:
DATA_PATH = "/Users/pongpisit/Desktop/research/pinn/Solving-Differential-Equations-with-Neural-Networks/SymbolicMathematics/data/burgers_shock.mat"

In [12]:
data = io.loadmat(DATA_PATH)

t = data['t'].flatten()[:,None]
x = data['x'].flatten()[:,None]
Exact = np.real(data['usol']).T

X, T = np.meshgrid(x,t)

X_star = np.hstack((X.flatten()[:,None], T.flatten()[:,None]))
u_star = Exact.flatten()[:,None]              

# Doman bounds
lb = X_star.min(0)
ub = X_star.max(0)

N = 2000
print(f"Training with {N} samples")
idx = np.random.choice(X_star.shape[0], N, replace=False)
X_u_train = X_star[idx, :]
u_train = u_star[idx,:]

Training with 2000 samples


In [13]:
class Network(nn.Module):
    def __init__(self, model, lambda_1_init, lambda_2_init):
        super(Network, self).__init__()
        self.model = model
        self.model.apply(self.xavier_init)
        self.lambda_1 = torch.nn.Parameter(torch.tensor([lambda_1_init]))
        self.lambda_2 = torch.nn.Parameter(torch.tensor([lambda_2_init]))
        
    def xavier_init(self, m):
        if type(m) == nn.Linear:
            torch.nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.01)
        
    def forward(self, x, t):
        return self.model(torch.cat([x, t], dim=1))
    
    def loss(self, x, t, y_input, is_pde_parameters_update=False, callable_fn=None):
        uf = self.forward(x, t)
        total_loss = F.mse_loss(uf, y_input, reduction='mean')
        
        if is_pde_parameters_update:
            lambda_1 = self.lambda_1
            lambda_2 = torch.exp(self.lambda_2)
            
            # PDE Loss calculation
            u_t = self.gradients(uf, t)[0]
            u_x = self.gradients(uf, x)[0]
            u_xx = self.gradients(u_x, x)[0]
            l_eq = (u_t + lambda_1*uf*u_x - lambda_2*u_xx)
            l_eq = (l_eq**2).mean()
            
            total_loss = total_loss + l_eq
        
        return total_loss
    
    def get_theta(self, x, t):
        self.eval()
        
        uf = self.forward(x, t)
        
        # PDE Loss calculation
        u_t = self.gradients(uf, t)[0]
        u_x = self.gradients(uf, x)[0]
        u_xx = self.gradients(u_x, x)[0]
        
        X = torch.cat([-uf*u_x, u_xx], dim=1)
        y = u_t
        
        theta = (torch.inverse(X.T@X))@(X.T@y)
        
        theta_1 = np.maximum(theta[:, 0][0].detach().item(), torch.finfo(torch.float32).eps)
        theta_2 = np.log(np.maximum(theta[:, 0][1].detach().item(), torch.finfo(torch.float32).eps))
        
        return theta_1, theta_2
    
    def get_gradients_dict(self, x, t):
        self.eval()
        
        uf = self.forward(x, t)
        
        ### PDE Loss calculation ###
        # first-order derivatives
        u_t = self.gradients(uf, t)[0]
        u_x = self.gradients(uf, x)[0]
        # Homo second-order derivatives
        u_tt = self.gradients(u_t,t)[0]
        u_xx = self.gradients(u_x, x)[0]
        # Hetero second-order derivatives
        u_xt = self.gradients(u_t, x)[0]
        u_tx = self.gradients(u_x, t)[0]
        
        return {'uf':uf, 'u_x':u_x, 'u_xx':u_xx, 'u_tt':u_tt, 'u_xt':u_xt, 'u_tx':u_tx}, u_t
    
    def gradients(self, func, x):
        return grad(func, x, create_graph=True, retain_graph=True, grad_outputs=torch.ones(func.shape))
    
    def set_lambdas(self, lambda_1_init, lambda_2_init):
        self.lambda_1 = torch.nn.Parameter(torch.tensor([lambda_1_init]))
        self.lambda_2 = torch.nn.Parameter(torch.tensor([lambda_2_init]))

In [14]:
hidden_nodes = 50

model = nn.Sequential(nn.Linear(2, hidden_nodes), 
                        nn.Tanh(), 
                        nn.Linear(hidden_nodes, hidden_nodes),
                        nn.Tanh(), 
                        nn.Linear(hidden_nodes, hidden_nodes),
                        nn.Tanh(), 
                        nn.Linear(hidden_nodes, hidden_nodes),
                        nn.Tanh(),
                        nn.Linear(hidden_nodes, 1))

# model = nn.Sequential(nn.Linear(2, hidden_nodes), 
#                         nn.Tanh(),  
#                         TanhNewtonImplicitLayer(hidden_nodes, max_iter=50),
#                         nn.Tanh(), 
#                         nn.Linear(hidden_nodes, 1))

# Doesn't matter, can be anything.
lambda_1_init = 0.0
lambda_2_init = 0.0

network = Network(model=model, lambda_1_init=lambda_1_init, lambda_2_init=lambda_2_init)

In [15]:
X_u_train = torch.tensor(X_u_train).float().requires_grad_(True)
u_train = torch.tensor(u_train).float().requires_grad_(True)

X_star = torch.tensor(X_star).float().requires_grad_(True)
u_star = torch.tensor(u_star).float().requires_grad_(True)

In [16]:
epochs = 600
learning_rate = 5e-2 # Report learning_rate = 5e-2
print("Training with learning rate:", learning_rate)

# optimizer = torch.optim.Adam(network.parameters(), lr=3e-4)  # metaopt also has .parameters()
optimizer = torch.optim.LBFGS(network.parameters(), lr=5e-3, max_iter=50, max_eval=50, line_search_fn='strong_wolfe')
# weights_path = './saved_path_inverse_burger/frozen_pinn.pth'
# weights_path = './saved_path_inverse_burger/nn_without_physical_reg.pth'

Training with learning rate: 0.05


In [17]:
network.train(); best_train_loss = 1e6
for i in range(epochs):
    ### Add the closure function to calculate the gradient. For LBFGS.
    def closure():
        if torch.is_grad_enabled():
            optimizer.zero_grad()
        l = network.loss(X_u_train[:, 0:1], X_u_train[:, 1:2], u_train, is_pde_parameters_update=False)
        if l.requires_grad:
            l.backward()
        return l

    optimizer.step(closure)
    
    # calculate the loss again for monitoring
    l = closure()
    
#     if i > 400 and float(l.item()) < best_train_loss:
#         torch.save(network.state_dict(), './saved_path_inverse_burger/nn_without_physical_reg.pth')
#         best_train_loss = float(l.item())

    if (i % 10) == 0:
        print("Epoch {}: ".format(i), l.item())

Epoch 0:  0.2412918657064438
Epoch 10:  0.0010341339511796832
Epoch 20:  0.0002771736471913755
Epoch 30:  0.00010327190102543682
Epoch 40:  6.218352791620418e-05
Epoch 50:  4.0249266021419317e-05
Epoch 60:  2.4834700525389053e-05
Epoch 70:  1.732042983348947e-05
Epoch 80:  1.1642016033874825e-05
Epoch 90:  8.07095602795016e-06
Epoch 100:  7.099173672031611e-06
Epoch 110:  6.3440857047680765e-06
Epoch 120:  6.275794021348702e-06
Epoch 130:  6.275794021348702e-06
Epoch 140:  6.275794021348702e-06
Epoch 150:  6.275794021348702e-06
Epoch 160:  6.275794021348702e-06
Epoch 170:  6.275794021348702e-06
Epoch 180:  6.275794021348702e-06
Epoch 190:  6.275794021348702e-06
Epoch 200:  6.275794021348702e-06
Epoch 210:  6.275794021348702e-06
Epoch 220:  6.275794021348702e-06
Epoch 230:  6.275794021348702e-06
Epoch 240:  6.275794021348702e-06
Epoch 250:  6.275794021348702e-06
Epoch 260:  6.275794021348702e-06
Epoch 270:  6.275794021348702e-06
Epoch 280:  6.275794021348702e-06
Epoch 290:  6.2757940213

### Evaluate the MSE loss comparing btw with & without the sparsity (Average the results from 5 evaluations?)
### The better one would benefit the Symbolic regression process to recover PDE relation.

In [18]:
# This is a somewhat large generalization gap.
evaluate_network_mse(network=network, X_star=X_star, u_star=u_star)

1.2121667168685235e-05

### Precise pde parameters recovery using the PINN technique

In [None]:
# lambda_1_init, lambda_2_init = network.get_theta(X_u_train[:, 0:1], X_u_train[:, 1:2])
# network.set_lambdas(lambda_1_init, lambda_2_init)

lambda_1_init = 0.6860763
lambda_2_init = np.log(0.0020577204)

### Choosing btw reset model weights or pretraining ###
network = Network(model=model, lambda_1_init=lambda_1_init, lambda_2_init=lambda_2_init)
optimizer = torch.optim.LBFGS(network.parameters(), lr=5e-2, max_iter=50, max_eval=50, line_search_fn='strong_wolfe')

network.train(); best_train_loss = 1e6
for i in range(epochs):
    ### Add the closure function to calculate the gradient. For LBFGS.
    def closure():
        if torch.is_grad_enabled():
            optimizer.zero_grad()
        l = network.loss(X_u_train[:, 0:1], X_u_train[:, 1:2], u_train, is_pde_parameters_update=True)
        if l.requires_grad:
            l.backward()
        return l

    optimizer.step(closure)

    # calculate the loss again for monitoring
    l = closure()

    if i > 400 and float(l.item()) < best_train_loss:
        torch.save(network.state_dict(), 'nn_with_physical_reg_from_symreg.pth')
        best_train_loss = float(l.item())

    if (i % 100) == 0:
        print("Epoch {}: ".format(i), l.item())

In [7]:
### Loading the best weights ###
network.load_state_dict(torch.load(weights_path))

<All keys matched successfully>

In [8]:
network.eval()

Network(
  (model): Sequential(
    (0): Linear(in_features=2, out_features=50, bias=True)
    (1): Tanh()
    (2): Linear(in_features=50, out_features=50, bias=True)
    (3): Tanh()
    (4): Linear(in_features=50, out_features=50, bias=True)
    (5): Tanh()
    (6): Linear(in_features=50, out_features=50, bias=True)
    (7): Tanh()
    (8): Linear(in_features=50, out_features=1, bias=True)
  )
)

In [9]:
nu = 0.01 / np.pi

error_lambda_1 = np.abs(network.lambda_1.detach().item() - 1.0)*100
error_lambda_2 = np.abs(torch.exp(network.lambda_2).detach().item() - nu) / nu * 100

error_lambda_1, error_lambda_2

(100.0, 31315.92653589793)

In [10]:
1.0, network.lambda_1.detach().item()

(1.0, 0.0)

In [11]:
nu, torch.exp(network.lambda_2).detach().item()

(0.003183098861837907, 1.0)

### Symbolic regression

In [12]:
# grads_dict, target = network.get_gradients_dict(X_u_train[:, 0:1], X_u_train[:, 1:2])
# index2features = grads_dict.keys()
# print(index2features)

# G = torch.cat(list(grads_dict.values()), dim=1).detach().numpy()
# target = torch.squeeze(target).detach().numpy()

dict_keys(['uf', 'u_x', 'u_xx', 'u_tt', 'u_xt', 'u_tx'])


In [11]:
# I already have the data
G = np.load("./saved_path_inverse_burger/data/derivatives-25600-V1-with-1000unlabledsamples.npy")
target = np.load("./saved_path_inverse_burger/data/dynamics-25600-V1-with-1000unlabledsamples.npy")
if len(target.shape) > 1:
    target = np.squeeze(target)

In [13]:
equations = pysr(G, target, niterations=20, binary_operators=["plus", "sub", "mult"], unary_operators=[], batching=True, procs=4, populations=10, npop=2000)

I will install Julia packages using PySR's Project.toml file. OK? (y/n):  y


OK. I will install at launch.
Running on julia -O3 /var/folders/z3/_stfms3523dd5mnfr3ch5n100000gp/T/tmphw4y73q1/runfile.jl
Activating environment on workers.
      From worker 5:	  Activating environment at `/usr/local/lib/python3.9/site-packages/Project.toml`
      From worker 4:	  Activating environment at `/usr/local/lib/python3.9/site-packages/Project.toml`
      From worker 3:	  Activating environment at `/usr/local/lib/python3.9/site-packages/Project.toml`
      From worker 2:	  Activating environment at `/usr/local/lib/python3.9/site-packages/Project.toml`
Importing installed module on workers...Finished!
Testing module on workers...Finished!
Testing entire pipeline on workers...Finished!
Started!

Cycles per second: 1.180e+03
Progress: 1 / 200 total iterations (0.500%)
Hall of Fame:
-----------------------------------------
Complexity  Loss       Score     Equation
1           6.776e-01  -0.000e+00  0.0011683702
3           6.111e-01  5.166e-02  (-0.41985202 * x0)
9           5

In [14]:
# Choose the one with best score => might be overfitting (the lowest loss)
print(best(equations))
# fn = best_callable(equations)

-0.9291712*x0*x1 + 0.003075424*x2


In [15]:
df = equations.drop(labels='lambda_format', axis=1)
df.to_pickle('./saved_path_inverse_burger/equations_from_pysr.pkl')

In [13]:
### The one config that I used, and it was giving a good approx symbolic representation of the data. ###

# (1)
# est_gp = SymbolicRegressor(population_size=50000, generations=20, function_set=('add', 'sub', 'mul'),
#                            p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05,
#                            p_point_mutation=0.1, max_samples=0.9, parsimony_coefficient=0.001,
#                            verbose=1, low_memory=True, n_jobs=2)

# (2)
# est_gp = SymbolicRegressor(population_size=60000, generations=20, function_set=('add', 'sub', 'mul'),
#                            p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05,
#                            p_point_mutation=0.1, max_samples=0.9, parsimony_coefficient=0.001,
#                            verbose=1, low_memory=True, n_jobs=-1)

# const_range=(-1. float(G.shape[1])) ?

### Current experiment ###
est_gp = SymbolicRegressor(population_size=60000, generations=25, function_set=('add', 'sub', 'mul'),
                           p_crossover=0.7, p_subtree_mutation=0.1, p_hoist_mutation=0.05,
                           p_point_mutation=0.1, max_samples=0.9, parsimony_coefficient=0.001,
                           verbose=1, low_memory=True, n_jobs=-1)

est_gp.fit(G, target)

    |   Population Average    |             Best Individual              |
---- ------------------------- ------------------------------------------ ----------
 Gen   Length          Fitness   Length          Fitness      OOB Fitness  Time Left
   0    28.22      2.09563e+57        7         0.314202         0.347012     26.05m
   1     7.43      4.10284e+46        7         0.302478         0.452525     34.27m
   2     4.43      5.18667e+27        5         0.290303         0.535045     42.47m
   3     4.85      1.97667e+30        7         0.289603           0.5684     40.97m
   4     6.19      2.94895e+25       13         0.131892         0.134996     36.53m
   5     7.24      6.53746e+22       15         0.125058         0.187611     34.19m
   6     6.80      1.70287e+35       15        0.0616337        0.0438162     31.96m
   7     6.77      3.32202e+26       15        0.0605004        0.0540161     31.20m
   8     8.89      2.09327e+28       15        0.0556881        0.0973268  



   9    11.77      3.18222e+26       15        0.0536423        0.0533186     31.35m
  10    12.87      2.93116e+23       15        0.0519578        0.0684789     27.37m
  11    14.51      5.48416e+21       19        0.0490354        0.0622942     24.73m
  12    15.02      3.50781e+24       37        0.0477976        0.0650997     23.75m
  13    15.79      5.72234e+27       33        0.0460954         0.055607     20.51m
  14    14.06      1.46036e+24       31        0.0467849        0.0494052     16.56m
  15    12.73      4.67856e+28       19        0.0470444         0.080213     12.56m
  16    11.82       2.0644e+24       27        0.0474535        0.0433376      9.23m
  17    11.26      1.47116e+20       11         0.048146         0.102665      6.04m
  18    11.00      2.02108e+23       11        0.0484568        0.0998683      3.28m
  19    10.92      1.10163e+48       11        0.0482482         0.101745      0.00s


SymbolicRegressor(function_set=('add', 'sub', 'mul'), max_samples=0.9, n_jobs=4,
                  p_crossover=0.7, p_hoist_mutation=0.05, p_point_mutation=0.1,
                  p_subtree_mutation=0.1, population_size=60000, verbose=1)

In [17]:
from utils import build_exp
program = est_gp._program
print(build_exp(program))

(-X0*X1 + 0.00329*X2, {X1, X0, X2})


In [16]:
# from utils import pickle_save
# pickle_save(est_gp, './data/gp_symreg_with_noisy_features.pkl')

Saved to ./data/gp_symreg_with_noisy_features2.pkl


In [None]:
# Exreacted equation (for further fine-tuning)
# u_t + 0.6860763*uf*u_x - 0.0020577204*u_xx