### Do I need to implement the GPU version for faster computation?

In [1]:
import numpy as np
import scipy.io as io
from torch.autograd import grad
from utils import *

In [2]:
DATA_PATH = "/Users/pongpisit/Desktop/research/pinn/Solving-Differential-Equations-with-Neural-Networks/SymbolicMathematics/data/burgers_shock.mat"
data = io.loadmat(DATA_PATH)

t = data['t'].flatten()[:,None]
x = data['x'].flatten()[:,None]
Exact = np.real(data['usol']).T

X, T = np.meshgrid(x,t)

X_star = np.hstack((X.flatten()[:,None], T.flatten()[:,None]))
u_star = Exact.flatten()[:,None]              

# Doman bounds
lb = X_star.min(0)
ub = X_star.max(0)

N = 2000
print(f"Training with {N} samples")
idx = np.random.choice(X_star.shape[0], N, replace=False)
X_u_train = X_star[idx, :]
u_train = u_star[idx,:]

# Convert to torch.tensor
X_u_train = torch.tensor(X_u_train).float().requires_grad_(True)
u_train = torch.tensor(u_train).float().requires_grad_(True)
X_star = torch.tensor(X_star).float().requires_grad_(True)
u_star = torch.tensor(u_star).float().requires_grad_(True)

Training with 2000 samples


In [3]:
class Network(nn.Module):
    def __init__(self, model):
        super(Network, self).__init__()
        self.model = model
        self.model.apply(self.xavier_init)
        # For tracking
        self.index2features = ('uf', 'u_x',  'u_xx', 'u_tt', 'u_xt', 'u_tx')
        self.uf = None
        
    def xavier_init(self, m):
        if type(m) == nn.Linear:
            torch.nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.01)
        
    def forward(self, x, t):
        self.uf = self.model(torch.cat([x, t], dim=1))
        return self.uf
    
    def get_selector_data(self, x, t):
        uf = self.forward(x, t)
        
        ### PDE Loss calculation ###
        # first-order derivatives
        u_t = self.gradients(uf, t)[0]
        u_x = self.gradients(uf, x)[0]
        # Homo second-order derivatives
        u_tt = self.gradients(u_t,t)[0]
        u_xx = self.gradients(u_x, x)[0]
        # Hetero second-order derivatives
        u_xt = self.gradients(u_t, x)[0]
        u_tx = self.gradients(u_x, t)[0]
        
        X_selector = torch.cat([uf, u_x, u_xx, u_tt, u_xt, u_tx], dim=1)
        y_selector = u_t
        
        return X_selector, y_selector
    
    def gradients(self, func, x):
        return grad(func, x, create_graph=True, retain_graph=True, grad_outputs=torch.ones(func.shape))

In [4]:
class SeclectorNetwork(nn.Module):
    def __init__(self, X_train_dim, th, gamma=1e-5):
        super().__init__()
        # Selector model
        layers = [nn.Linear(X_train_dim, 50), nn.Tanh(), 
                            nn.Linear(50, 50), nn.Tanh(), 
                            nn.Linear(50, X_train_dim), ThresholdSoftmax(th=th)]
        self.selector_model = nn.Sequential(*layers)
        
        # Nonlinear model
        layers = [nn.Linear(3, 100), nn.Tanh(), nn.Linear(100, 1)] # 50 or 100
        self.nonlinear_model = nn.Sequential(*layers)
        # Mask and gamma parameter, I may need to tune the gamma param!.
        self.mask = None
        self.gamma = gamma
    
    # You should either call forward or loss once
    def forward(self, inn):
        self.mask = self.selector_model(inn)
        ut_approx = self.nonlinear_model(inn[:, self.mask])
        return ut_approx
    
    # You should either call forward or loss once
    def loss(self, X_input, y_input, include_mask_loss=False):
        ut_approx = self.forward(X_input)
        mse_loss = F.mse_loss(ut_approx, y_input, reduction='mean')
        if include_mask_loss:
            # TODO: difference btw those passing and those not
            mse_loss -= self.selector_model[-1].prob[self.mask].sum() * self.gamma
        return mse_loss

In [5]:
network = Network(model=simple_solver_model(50))
selector = SeclectorNetwork(X_train_dim=6, th=0.5)

# optimizer = torch.optim.LBFGS(list(network.parameters()) + list(selector.parameters()), 
#                               lr=5e-2, max_iter=80, max_eval=100, 
#                               history_size=120, line_search_fn='strong_wolfe')

optimizer = torch.optim.Adam(list(network.parameters()) + list(selector.parameters()), lr=1e-3)
epochs = 1200; testing = False

if testing:
    # unsupervised_loss
    unsup_loss = selector.loss(*network.get_selector_data(*dimension_slicing(X_u_train)))
    sup_loss = F.mse_loss(network.uf, u_train)

    # No MTL yet, apply the naive summation first to see if it's working?
    total_loss = unsup_loss + sup_loss
    print(total_loss)

    total_loss.backward()

In [6]:
network.train(); selector.train(); best_train_loss = 1e6

for i in range(epochs):
    ### Add the closure function to calculate the gradient. For LBFGS.
    def closure():
        if torch.is_grad_enabled():
            optimizer.zero_grad()
        
        # Total loss calculation process
        # unsupervised_loss
        unsup_loss = selector.loss(*network.get_selector_data(*dimension_slicing(X_u_train)), include_mask_loss=True)
        sup_loss = F.mse_loss(network.uf, u_train)

        # No MTL yet, apply the naive summation first to see if it's working?
        total_loss = unsup_loss + sup_loss
        
        if total_loss.requires_grad:
            total_loss.backward()
        
        return total_loss

    optimizer.step(closure)
    
    # calculate the loss again for monitoring
    l = closure()

    if (i % 100) == 0:
        print("Epoch {}: ".format(i), l.item())
        print(selector.mask)

Epoch 0:  0.30441462993621826
tensor([0, 1, 2])
Epoch 100:  0.04821036010980606
tensor([0, 1, 2])
Epoch 200:  0.03587862104177475
tensor([0, 1, 2])
Epoch 300:  0.028232170268893242
tensor([0, 1, 2])
Epoch 400:  0.022483276203274727
tensor([0, 1, 2])
Epoch 500:  0.018681025132536888
tensor([0, 1, 2])
Epoch 600:  0.01666880026459694
tensor([0, 1, 2])
Epoch 700:  0.014937910251319408
tensor([0, 1, 2])
Epoch 800:  0.013383101671934128
tensor([0, 1, 2])
Epoch 900:  0.01193489320576191
tensor([0, 1, 2])
Epoch 1000:  0.011945461854338646
tensor([0, 1, 2])
Epoch 1100:  0.01184071134775877
tensor([0, 1, 2])


In [7]:
network.eval()
F.mse_loss(network(*dimension_slicing(X_star)).detach(), u_star)

tensor(0.0077, grad_fn=<MseLossBackward>)

In [9]:
selector.mask

tensor([0, 1, 2])

In [10]:
('uf', 'u_x',  'u_xx', 'u_tt', 'u_xt', 'u_tx')

('uf', 'u_x', 'u_xx', 'u_tt', 'u_xt', 'u_tx')

In [11]:
selector.selector_model[-1].prob

tensor([1.0304e-01, 3.6693e-01, 5.2988e-01, 3.9262e-05, 5.9319e-05, 4.8331e-05],
       grad_fn=<MeanBackward1>)

In [12]:
torch.argsort(selector.selector_model[-1].prob, descending=True)

tensor([2, 1, 0, 4, 5, 3])