In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import torch.distributions as D
import os
import numpy as np
import torch.optim as optim
import time
from torch.optim.lr_scheduler import LambdaLR, ReduceLROnPlateau, StepLR
# Set computation device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

KeyboardInterrupt: 

In [None]:
class Input:
    """Handles generation of input coordinates for different constraint types
    
    Attributes:
        x (np.array): Spatial coordinates array
        t (np.array): Temporal coordinates array
        
    Methods:
        get_ic: Initial condition coordinates (t=0)
        get_bc: Boundary condition coordinates (x=0,L)
        get_colloc: Collocation points for PDE residual calculation
    """
    
    def __init__(self, x, t):
        """Initialize spatial and temporal domains"""
        self.x = x
        self.t = t
        
    def get_ic(self):
        """Generate initial condition coordinates (t=0 for all x)
        
        Returns:
            X (torch.Tensor): Spatial coordinates with grad enabled
            T (torch.Tensor): Zero-initialized temporal coordinates
        """
        X = torch.from_numpy(self.x).float().to(device)
        T = torch.zeros_like(X).to(device)
        X.requires_grad_()
        T.requires_grad_()
        return X, T

    def get_bc(self):
        """Generate boundary condition coordinates (x=0,L for all t>0)
        
        Returns:
            X (torch.Tensor): Repeated boundary coordinates
            T (torch.Tensor): Temporal coordinates excluding t=0
        """
        X = self.x[[0,-1]].repeat(len(self.t)-1)
        T = np.concatenate([self.t[1:], self.t[1:]])  # Fixed np.concat -> np.concatenate
        X = torch.from_numpy(X).float().to(device)
        T = torch.from_numpy(T).float().to(device)
        X.requires_grad_()
        T.requires_grad_()
        return X, T
    
    def get_colloc(self):
        """Generate collocation points for PDE residual calculation
        
        Returns:
            X (torch.Tensor): Flattened spatial coordinates (interior points)
            T (torch.Tensor): Flattened temporal coordinates (t>0)
        """
        x_colloc = self.x[1:-1]
        t_colloc = self.t[1:]
        X,T = np.meshgrid(x_colloc,t_colloc,indexing='ij')
        X = torch.from_numpy(X.ravel()).float().to(device)
        T = torch.from_numpy(T.ravel()).float().to(device)
        X.requires_grad_()
        T.requires_grad_()
        return X, T

In [None]:
class AlphaNet(nn.Module):
    """Probabilistic neural network for predicting PDE solution distribution
    
    Architecture:
        - Multiple hidden layers with SiLU activation
        - Dual output heads for mean and log-variance
        - Incorporates spatial, temporal, and parameter inputs (X,T,g,k)
    
    Args:
        hidden_features (list): List of neurons per hidden layer
        in_features (int): Number of input features (default=4: X,T,g,k)
    """
    
    def __init__(self, hidden_features, in_features=4):
        super().__init__()
        self.hidden_layers = nn.ModuleList()
        
        # Construct hidden layer architecture
        prev_features = in_features
        for features in hidden_features:
            self.hidden_layers.append(nn.Linear(prev_features, features))
            prev_features = features
        
        # Output layers for distribution parameters
        self.mu_layer = nn.Linear(prev_features, 1)  # Mean prediction
        self.logsig_layer = nn.Linear(prev_features, 1)  # Log-variance prediction

    def forward(self, X, T, g, k):
        """Forward pass with reparameterized features
        
        Returns:
            mu (torch.Tensor): Predicted mean values
            logsig (torch.Tensor): Log-standard deviation (softplus-regularized)
        """
        G = g * torch.ones_like(X)
        K = k * torch.ones_like(X)
        inp = torch.stack([X,T,G,K], dim=1)
        for layer in self.hidden_layers:
            inp = F.silu(layer(inp))
        mu = self.mu_layer(inp).squeeze(-1)
        logsig = F.softplus(self.logsig_layer(inp)).squeeze(-1) + 1e-6  # Ensure positivity
        
        return mu, logsig

In [None]:
def get_u(mu, sig):
    """Reparameterization trick for sampling from distribution
    
    Args:
        mu (torch.Tensor): Predicted mean values
        sig (torch.Tensor): Predicted standard deviation values
    
    Returns:
        u (torch.Tensor): Sampled solution values
    """
    epsilon = torch.randn_like(mu)
    return mu + sig * epsilon

def get_dudx(u, X, create_graph=True):
    """Compute first derivative using automatic differentiation
    
    Args:
        u (torch.Tensor): Solution values
        X (torch.Tensor): Input coordinates
        create_graph (bool): Enable higher-order derivative tracking
    
    Returns:
        grad (torch.Tensor): du/dX values
    """
    return torch.autograd.grad(u, X, torch.ones_like(u), 
                             retain_graph=True, create_graph=create_graph)[0]

def get_residual(u, X, T, g, k):
    """Compute PDE residual: ∂u/∂t - (1/g)∇·(η(u)∇u)
    
    Where η(u) = |(k²u + 1)/k|
    
    Args:
        u (torch.Tensor): Predicted solution values
        X (torch.Tensor): Spatial coordinates
        T (torch.Tensor): Temporal coordinates
        g (float): PDE parameter
        k (float): PDE parameter
    
    Returns:
        residual (torch.Tensor): PDE residual values
    """
    ut = get_dudx(u, T)  # Time derivative
    ux = get_dudx(u, X)  # Spatial derivative
    eta = torch.abs((u * k**2 + 1)/k)  # Nonlinear coefficient
    return ut - (1/g) * get_dudx(eta * ux, X)  # PDE residual

def loss_ic(model, X, T, g, k, ic):
    """Initial condition loss: Negative log probability
    
    Combines:
        - Likelihood of samples under predicted distribution
        - Constraint matching to initial condition
    
    Args:
        model (AlphaNet): Trained network
        X (torch.Tensor): Spatial coordinates
        T (torch.Tensor): Temporal coordinates (zeros)
        g (float): PDE parameter
        k (float): PDE parameter
        ic (torch.Tensor): Initial condition values
    
    Returns:
        loss (torch.Tensor): Negative log probability
    """
    mu, sig = model(X, T, g, k)
    u = get_u(mu, sig)
    
    u_dist = D.Normal(mu, sig)
    l1 = u_dist.log_prob(u)  # Likelihood term
    
    r_dist = D.Normal(u-ic, 0.001*torch.ones_like(u))
    l2 = r_dist.log_prob(torch.zeros_like(u))  # Constraint term
    
    return l1 - l2

def loss_bc(model, X, T, g, k, bc):
    """Boundary condition loss (similar structure to loss_ic)
    
    Args:
        bc (torch.Tensor): Boundary condition values
    """
    mu, sig = model(X, T, g, k)
    u = get_u(mu, sig)
    
    u_dist = D.Normal(mu, sig)
    l1 = u_dist.log_prob(u)
    
    r_dist = D.Normal(u-bc, 0.001*torch.ones_like(u))
    l2 = r_dist.log_prob(torch.zeros_like(u))
    
    return l1 - l2

def loss_colloc(model, X, T, g, k):
    """Physics-informed loss at collocation points
    
    Returns:
        loss (torch.Tensor): Combined negative log probability
        residual (torch.Tensor): PDE residual values
    """
    mu, sig = model(X, T, g, k)
    u = get_u(mu, sig)

    u_dist = D.Normal(mu, sig)
    l1 = u_dist.log_prob(u)

    residual = get_residual(u, X, T, g, k)
    r_dist = D.Normal(residual, 0.001*torch.ones_like(residual))
    l2 = r_dist.log_prob(torch.zeros_like(residual))

    return l1 - l2, residual

In [None]:
# Set up initial conditions
g = 2  # model parameter (gamma)
k = 3
N = 2**8
x = np.linspace(0, 2*np.pi, N)  # x: discretization of interval [0,16pi] into 2^7 elements
t = np.linspace(0,10,600)
inp = Input(x,t)
Xic, Tic = inp.get_ic()
ic = (torch.sin(Xic) + 1).detach()
Xbc, Tbc = inp.get_bc()
bc = torch.ones_like(Xbc)
Xcolloc, Tcolloc = inp.get_colloc()

In [None]:
alpha = AlphaNet([128,128,128,64]).to(device)
#alpha.load_state_dict(torch.load("alpha-0.5.pth",map_location=device))

In [None]:
lr = 0.001
optimizer_adamw = torch.optim.AdamW(alpha.parameters(), lr=lr)
optimizer = optimizer_adamw

In [None]:
num_epochs = 7000
epoch_time = 0
for epoch in range(num_epochs):
    epoch_start = time.perf_counter()
    alpha.train()
    # compute losses
    l1 = torch.mean(loss_ic(alpha,Xic,Tic,g,k,ic)) # initial condition loss
    l2 = torch.mean(loss_bc(alpha,Xbc,Tbc,g,k,bc)) # boundary condition loss
    l3,l4 = loss_colloc(alpha,Xcolloc,Tcolloc,g,k) # collocation and residual
    l3 = torch.mean(l3) # collocation loss
    l4 = F.mse_loss(l4,torch.zeros_like(l4)) # residual loss
    loss = (1*l1+1*l2+1*l3)
    optimizer.zero_grad()
    # backward step
    loss.backward()
    optimizer.step()
    epoch_time += time.perf_counter() - epoch_start 
    if epoch%100 == 0:
        print(f"Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f} | current_lr = {optimizer.param_groups[0]['lr']} | Time: {epoch_time:.2f} seconds")
        print(f"L1: {l1}| L2: {l2}| L3: {l3} | L4: {l4}")
        epoch_time = 0

In [None]:
Xf = Xic
Tf = 3 * torch.ones_like(Xf)
result, _ = alpha(Xf,Tf,g,k)
result_cpu = result.cpu().detach()
result_np = result_cpu.numpy()
print(result_np)
import matplotlib.pyplot as plt
plt.plot(result_cpu)
plt.show()

In [None]:
#torch.save(alpha.state_dict(),"alpha-0.5.pth")