In [1]:
%load_ext autoreload
%autoreload 2 
%reload_ext autoreload
%matplotlib inline
import matplotlib.pyplot as plt

# always import gbm_algos first !
import xgboost, lightgbm, catboost
from gplearn.genetic import SymbolicRegressor

# To access the contents of the parent dir
import sys; sys.path.insert(0, '../')
import os
from scipy.io import loadmat
from utils import *
from preprocess import *

# Let's do facy optimizers
from optimizers import Lookahead, AdamGC, SGDGC
from madgrad import MADGRAD
from lbfgsnew import LBFGSNew
# Modify at /usr/local/lib/python3.9/site-packages/torch_lr_finder/lr_finder.py
from torch_lr_finder import LRFinder

from tqdm import trange

In [2]:
include_N_res = True

DATA_PATH = '../PDE_FIND_experimental_datasets/kuramoto_sivishinky.mat'
X, T, Exact = space_time_grid(data_path=DATA_PATH, real_solution=True)
X_star, u_star = get_trainable_data(X, T, Exact)

# Doman bounds
lb = X_star.min(axis=0)
ub = X_star.max(axis=0)

N = 5000
print(f"Training with {N} samples")
idx = np.random.choice(X_star.shape[0], N, replace=False)
X_u_train = X_star[idx, :]
u_train = u_star[idx,:]

# Unsup data
if include_N_res:
    N_res = N//2
    idx_res = np.array(range(X_star.shape[0]-1))[~idx]
    idx_res = np.random.choice(idx_res.shape[0], N_res, replace=True)
    X_res = X_star[idx_res, :]
    print(f"Training with {N_res} unsup samples")
    X_u_train = np.vstack([X_u_train, X_res])
    u_train = np.vstack([u_train, torch.rand(X_res.shape[0], 1) - 1000])
    # del X_res
else: print("Not including N_res")
    
# Convert to torch.tensor
X_u_train = to_tensor(X_u_train, True)
u_train = to_tensor(u_train, False)
X_star = to_tensor(X_star, True)
u_star = to_tensor(u_star, False)

# lb and ub are used in adversarial training
scaling_factor = 1.0
lb = scaling_factor*to_tensor(lb, False)
ub = scaling_factor*to_tensor(ub, False)

# Feature names
feature_names=('uf', 'u_x', 'u_xx', 'u_xxx', 'u_xxxx')

Training with 5000 samples
Training with 2500 unsup samples


In [3]:
# %pylab inline
# pylab.rcParams['figure.figsize'] = (12, 8)
# import numpy as np
# from mpl_toolkits.mplot3d import Axes3D
# import scipy.io as sio

# fig1 = figure()
# ax = fig1.gca(projection='3d')
# surf = ax.plot_surface(X, T, Exact, rstride=1, cstride=1, cmap=cm.coolwarm,
#     linewidth=0, antialiased=False)
# title('Kuramoto Sivashinsky Equation', fontsize = 20)
# xlabel('x', fontsize = 16)
# ylabel('t', fontsize = 16)

In [4]:
class Network(nn.Module):
    def __init__(self, model, index2features=None, scale=False, lb=None, ub=None):
        super(Network, self).__init__()
        # pls init the self.model before
        self.model = model
        # For tracking, the default tup is for the burgers' equation.
        self.index2features = index2features
        print("Considering", self.index2features)
        self.diff_flag = diff_flag(self.index2features)
        self.uf = None
        self.scale = scale
        self.lb, self.ub = lb, ub
        
    def xavier_init(self, m):
        if type(m) == nn.Linear:
            torch.nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.01)

    def forward(self, x, t):
        if not self.scale: self.uf = self.model(torch.cat([x, t], dim=1))
        else: self.uf = self.model(self.neural_net_scale(torch.cat([x, t], dim=1)))
        return self.uf
    
    def get_selector_data(self, x, t):
        uf = self.forward(x, t)
        u_t = self.gradients(uf, t)[0]
        
        ### PDE Loss calculation ###
        # Without calling grad
        derivatives = []
        for t in self.diff_flag[0]:
            if t=='uf': derivatives.append(uf)
            elif t=='x': derivatives.append(x)
        # With calling grad
        for t in self.diff_flag[1]:
            out = uf
            for c in t:
                if c=='x': out = self.gradients(out, x)[0]
                elif c=='t': out = self.gradients(out, t)[0]
            derivatives.append(out)
        
        return torch.cat(derivatives, dim=1), u_t
    
    def gradients(self, func, x):
        return grad(func, x, create_graph=True, retain_graph=True, grad_outputs=torch.ones(func.shape))
    
    def neural_net_scale(self, inp): 
        return 2*(inp-self.lb/(self.ub-self.lb))-1

In [5]:
m = TorchMLP(dimensions=[2, 50, 50, 50 ,50, 50, 1], activation_function=nn.Tanh, bn=nn.LayerNorm, dropout=None)
optimizer = MADGRAD(m.parameters(), lr=1e-3)
# optimizer = LBFGSNew(m.parameters(), 
#                      lr=1e-2, max_iter=300, 
#                      max_eval=int(300*1.25), history_size=150, 
#                      line_search_fn=True, batch_mode=False)

Using old implementation of TorchMLP. See models.py for more new model-related source code.


In [6]:
X_train = -1 + 2*((X_u_train-lb)/(ub-lb)).detach().requires_grad_(False)
for i in range(50000):
    def pretraining_closure():
        global N, X_u_train, u_train
        if torch.is_grad_enabled():
            optimizer.zero_grad()
        # Only focusing on first [:N, :] elements
        mse_loss = F.mse_loss(m(X_train[:N, :]), u_train[:N, :])
        if mse_loss.requires_grad:
            mse_loss.backward(retain_graph=False)
        return mse_loss
    
    optimizer.step(pretraining_closure)
    mse_loss = pretraining_closure()
    
    if (i%100)==0:
        print(mse_loss.item())

1.7847830057144165
0.9558131694793701
0.8873608112335205
0.7654500603675842
0.8421025276184082
0.6458638310432434
0.6336608529090881
0.5828167796134949
0.5426675081253052
0.4857039153575897
0.45902562141418457
0.3698643445968628
0.35103023052215576
0.360348105430603
0.3340137302875519
0.26383572816848755


KeyboardInterrupt: 

In [28]:
X_test = ((X_star-lb)/(ub-lb)).detach().requires_grad_(False)

In [29]:
F.mse_loss(m(X_test), u_star).item()

0.034587275236845016

In [30]:
u_star

tensor([[ 1.0061],
        [ 1.0122],
        [ 1.0182],
        ...,
        [-0.7986],
        [-0.8945],
        [-0.9826]])

In [31]:
((m(X_test)-u_star)**2).mean()

tensor(0.0346, grad_fn=<MeanBackward0>)

In [32]:
m(X_test)

tensor([[0.9557],
        [0.9616],
        [0.9676],
        ...,
        [0.6230],
        [0.5906],
        [0.5578]], grad_fn=<AddmmBackward>)

In [33]:
F.mse_loss(m(X_train[:N, :]), u_train[:N, :])

tensor(0.0187, grad_fn=<MseLossBackward>)