In [None]:
import numpy as np
import scipy
import matplotlib.pyplot as plt
import torch
from scipy.integrate import solve_ivp
import torch.nn as nn
import numpy as np
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.nn.functional import mse_loss


In [None]:
class DGM_Layer(nn.Module):
    
    def __init__(self, dim_x, dim_S, activation='Tanh'):
        super(DGM_Layer, self).__init__()
        
        if activation == 'ReLU':
            self.activation = nn.ReLU()
        elif activation == 'Tanh':
            self.activation = nn.Tanh()
        elif activation == 'Sigmoid':
            self.activation = nn.Sigmoid()
        elif activation == 'LogSigmoid':
            self.activation = nn.LogSigmoid()
        else:
            raise ValueError("Unknown activation function {}".format(activation))
            

        self.gate_Z = self.layer(dim_x+dim_S, dim_S)
        self.gate_G = self.layer(dim_x+dim_S, dim_S)
        self.gate_R = self.layer(dim_x+dim_S, dim_S)
        self.gate_H = self.layer(dim_x+dim_S, dim_S)
            
    def layer(self, nIn, nOut):
        l = nn.Sequential(nn.Linear(nIn, nOut), self.activation)
        return l
    
    def forward(self, x, S):
        x_S = torch.cat([x,S],1)
        Z = self.gate_Z(x_S)
        G = self.gate_G(x_S)
        R = self.gate_R(x_S)
        
        input_gate_H = torch.cat([x, S*R],1)
        H = self.gate_H(input_gate_H)
        
        output = ((1-G))*H + Z*S
        return output
class Net_DGM(nn.Module):

    def __init__(self, dim_x, dim_S, activation='Tanh'):
        super(Net_DGM, self).__init__()

        self.dim = dim_x
        if activation == 'ReLU':
            self.activation = nn.ReLU()
        elif activation == 'Tanh':
            self.activation = nn.Tanh()
        elif activation == 'Sigmoid':
            self.activation = nn.Sigmoid()
        elif activation == 'LogSigmoid':
            self.activation = nn.LogSigmoid()
        else:
            raise ValueError("Unknown activation function {}".format(activation))

        self.input_layer = nn.Sequential(nn.Linear(dim_x+1, dim_S), self.activation)

        self.DGM1 = DGM_Layer(dim_x=dim_x+1, dim_S=dim_S, activation=activation)
        self.DGM2 = DGM_Layer(dim_x=dim_x+1, dim_S=dim_S, activation=activation)
        self.DGM3 = DGM_Layer(dim_x=dim_x+1, dim_S=dim_S, activation=activation)

        self.output_layer = nn.Linear(dim_S, 1)

    def forward(self,t,x):
        tx = torch.cat([t,x], 1)
        S1 = self.input_layer(tx)
        S2 = self.DGM1(tx,S1)
        S3 = self.DGM2(tx,S2)
        S4 = self.DGM3(tx,S3)
        output = self.output_layer(S4)
        return output

In [None]:
def compute_total_loss_v(model_u, t_samples, x_samples, sigma, H, M, C, D, R, model_a):
    """
    Compute the total loss for the PDE problem, including the equation part and the boundary condition part.
    """
    # Ensure input requires gradients
    t_samples.requires_grad_(True)
    x_samples.requires_grad_(True)

    # Model prediction
    u = model_u(t_samples, x_samples)
    
    combined_input = torch.cat((t_samples, x_samples), dim=1)
    alpha = model_a(combined_input)
    
    # Time derivative
    grad_u_t = torch.autograd.grad(u.sum(), t_samples, create_graph=True)[0]
    
    # Spatial derivative
    grad_u_x = torch.autograd.grad(u.sum(), x_samples, create_graph=True, retain_graph=True)[0]
    
    # Compute second derivatives for each dimension
    hessians = []
    for i in range(t_samples.size(0)):
        def u_function(x_samples):
            # Expand t and x to at least one-dimensional tensors to enable concatenation
            t_expanded = t_samples[i].unsqueeze(0)
            x_expanded = x_samples
            return model_u(t_expanded, x_expanded).sum()
        # Note: Assuming x_samples[i] is already at least one-dimensional tensor, no need for expansion
        grad_u_xx = torch.autograd.functional.hessian(u_function, x_samples[i].unsqueeze(0))
        hessians.append(grad_u_xx)

    # Squeeze extra singleton dimensions and collect processed tensors
    squeezed_hessians = [torch.squeeze(tensor) for tensor in hessians]
    
    # Stack processed tensors
    stacked_hessians = torch.stack(squeezed_hessians)
    grad_u_xx = stacked_hessians
    
    # Compute σσ^T
    sigma_sigma_T = sigma @ sigma.T
    n = stacked_hessians.size(0)

    # Add a new first dimension to sigma_sigma_T
    sigma_sigma_T_unsqueezed = sigma_sigma_T.unsqueeze(0)

    # Expand the size of the first dimension to n
    expanded_sigma_sigma_T = sigma_sigma_T_unsqueezed.expand(n, -1, -1)
    
    # Compute diffusion term
    product = torch.matmul(expanded_sigma_sigma_T, grad_u_xx)
    trace = torch.diagonal(product, dim1=-2, dim2=-1).sum(-1, keepdim=True)
    diffusive_term = 0.5 * trace
    
    # Compute convective term
    convective_term = torch.sum(grad_u_x * (H @ x_samples.T).T, dim=1, keepdim=True) + torch.sum(grad_u_x * (M @ alpha.T).T, dim=1, keepdim=True)
    
    # Compute quadratic term
    quadratic_term = torch.sum(x_samples * (C @ x_samples.T).T, dim=1, keepdim=True)
   
    # Compute constant term
    temp = alpha @ D
    constant_term = torch.einsum('ij,ij->i', temp, alpha)
    constant_term = constant_term.unsqueeze(-1)
    
    # Compute PDE residual
    pde_residual = grad_u_t + diffusive_term + convective_term + quadratic_term + constant_term
    
    # Loss for the equation part
    loss_eqn = pde_residual.pow(2).mean()
    
    # Loss for the boundary condition part
    T = torch.tensor(1.0, requires_grad=False).expand_as(t_samples)  # Assume boundary is at T=1
    u_T = model_u(T, x_samples)
    boundary_condition = (u_T - torch.sum(x_samples * (R @ x_samples.T).T, dim=1, keepdim=True)).pow(2)
    loss_boundary = boundary_condition.mean()
    
    # Total loss
    total_loss = loss_eqn + loss_boundary
    
    return total_loss


In [None]:
# Establishing u
dim_x = 2  # 1-dimensional time + 2-dimensional space
dim_S = 100  # Assume the hidden layer size is 100
activation = 'Tanh'  # Select 'Tanh' as the activation function

# Creating an instance of the Net_DGM model
v = Net_DGM(dim_x=dim_x, dim_S=dim_S, activation=activation)


In [None]:
T = 1.0  # Upper bound of the time interval
N = 2000  # Number of sampling points

# Randomly generate time points t ∈ [0, T)
t_samples = torch.rand(N, 1) * T

# Randomly generate spatial points x ∈ [-1, 1] x [-1, 1]
x_samples = (torch.rand(N, 2) * 20) - 10

In [None]:
sigma = torch.tensor([[0.001], [0.001]], dtype=torch.float32, requires_grad=False)

H = torch.tensor([[0.1, 0.0],
                  [0.0, 0.1]], dtype=torch.float32, requires_grad=False)

M = torch.tensor([[1, 0],
                  [0, 1]], dtype=torch.float32, requires_grad=False)

C = torch.tensor([[0.0, 0.0],
                  [0.0, 0.0]], dtype=torch.float32, requires_grad=False)

D = torch.tensor([[1, 0],
                  [0, 1]], dtype=torch.float32, requires_grad=False)

R = torch.tensor([[10.0, 0.0],
                  [0.0, 10.0]], dtype=torch.float32, requires_grad=False)

a = torch.tensor([1.0, 1.0], dtype=torch.float32, requires_grad=False)

In [None]:
def compute_total_loss_a(model_u, t_samples, x_samples, sigma, H, M, C, D, R, model_a):
    """
    Compute the total loss for the PDE problem, including the equation part and the boundary condition part.
    """
    # Ensure input requires gradients
    t_samples.requires_grad_(True)
    x_samples.requires_grad_(True)

    # Model prediction
    u = model_u(t_samples, x_samples)
    combined_input = torch.cat((t_samples, x_samples), dim=1)
    alpha = model_a(combined_input)

    # Spatial derivative
    grad_u_x = torch.autograd.grad(u.sum(), x_samples, create_graph=True, retain_graph=True)[0]
    
    # Compute convective term
    convective_term = torch.sum(grad_u_x * (H @ x_samples.T).T, dim=1, keepdim=True) + torch.sum(grad_u_x * (M @ alpha.T).T, dim=1, keepdim=True)
    
    # Compute quadratic term
    quadratic_term = torch.sum(x_samples * (C @ x_samples.T).T, dim=1, keepdim=True)
   
    # Compute constant term
    temp = alpha @ D
    constant_term = torch.einsum('ij,ij->i', temp, alpha)
    constant_term = constant_term.unsqueeze(-1)
    
    # Compute PDE residual
    pde_residual = convective_term + quadratic_term + constant_term
    
    # Loss for the equation part
    loss_eqn = pde_residual.mean()
    
    return loss_eqn


In [None]:
class FFN(nn.Module):

    def __init__(self, sizes, activation=nn.ReLU, output_activation=nn.Identity, batch_norm=False):
        super().__init__()
        
        layers = [nn.BatchNorm1d(sizes[0]),] if batch_norm else []
        for j in range(len(sizes)-1):
            layers.append(nn.Linear(sizes[j], sizes[j+1]))
            if batch_norm:
                layers.append(nn.BatchNorm1d(sizes[j+1], affine=True))
            if j<(len(sizes)-2):
                layers.append(activation())
            else:
                layers.append(output_activation())

        self.net = nn.Sequential(*layers)

    def freeze(self):
        for p in self.parameters():
            p.requires_grad=False

    def unfreeze(self):
        for p in self.parameters():
            p.requires_grad=True

    def forward(self, x):
        return self.net(x)

In [None]:
# Establishing u
dim_x_v = 2  # Adjust according to the actual situation
dim_S_v = 100  # Assume the hidden layer size is 100
activation_v = 'Tanh'  # Select 'Tanh' as the activation function

# Creating an instance of the Net_DGM model
v = Net_DGM(dim_x=dim_x_v, dim_S=dim_S_v, activation=activation_v)
optimizer = torch.optim.Adam(v.parameters(), lr=0.01) 

# Establishing a
dim_x_a = 3  # Adjust according to the actual situation
output_dim_a = 2
hidden_layers_a = [100, 100]  # Increase more hidden layers if necessary, e.g., [100, 100] indicates two hidden layers with 100 nodes each

# Define network structure for a
a = FFN([dim_x_a] + hidden_layers_a + [output_dim_a], activation=nn.ReLU, output_activation=nn.Identity)
optimizer2 = torch.optim.Adam(a.parameters(), lr=0.001)


In [None]:
true_v = 12649.0303
t = torch.tensor([0.], dtype=torch.float32).unsqueeze(0)
x = torch.tensor([100.0,50.0], dtype=torch.float32).unsqueeze(0)

In [None]:
m = 30
e_all = []
v_all = []

for i in range(m):
    # Train v first
    # Training the first one
    num_epochs_v = 5  # Number of epochs for v
    optimizer_v = torch.optim.Adam(v.parameters(), lr=0.01) 
    for epoch in range(num_epochs_v):
        optimizer_v.zero_grad()  # Clear gradients at the beginning of each epoch
        loss_v = compute_total_loss_v(v, t_samples, x_samples, sigma, H, M, C, D, R, a)  # Compute loss
        loss_v.backward()  # Backpropagation
        optimizer_v.step()  # Update parameters
        print(f"Epoch_v {epoch}, Loss: {loss_v.item()}")

    # Training the second one
    num_epochs_a = 5  # Number of epochs for a
    optimizer_a = torch.optim.Adam(a.parameters(), lr=0.001)
    for epoch in range(num_epochs_a):
        optimizer_a.zero_grad()  # Clear gradients
        loss_a = compute_total_loss_a(v, t_samples, x_samples, sigma, H, M, C, D, R, a)  # Compute loss
        loss_a.backward()  # Backpropagation
        optimizer_a.step()  # Update parameters
        print(f"Epoch_a {epoch}, Loss: {loss_a.item()}")

    e = abs(true_v - v(t, x))
    e_all.append(e)
    v_all.append(v(t, x))



In [None]:
e_all 
e_values = [e.item() for e in e_all]
e_values_30= e_values[:30]
plt.loglog(range(1, len(e_values_30) + 1), e_values_30)
plt.xlabel('Epoch')
plt.ylabel('Error')
plt.title('Log-Log Plot of Error')
plt.grid(True)  
plt.show()