In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from typing import List, Tuple

import numpy as np

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))

torch.manual_seed(0)
torch.cuda.manual_seed(0)

Using device: cuda
NVIDIA GeForce RTX 4090


In [19]:
class PINN(nn.Module):
    def __init__(self, input_dim:int=3, output_dim:int=1, num_hidden_layers:int=2, hidden_dim:int=32):
        """
        Initialize the Physics-Informed Neural Network (PINN).
        input_dim: Dimension of the input (e.g., x, y, z).
        output_dim: Dimension of the output (e.g., u).
        num_hidden_layers: Number of hidden layers in the network.
        hidden_dim: Number of neurons in each hidden layer.
        """
        super(PINN, self).__init__()

        self.input_dim = input_dim
        self.output_dim = output_dim
        self.num_hidden_layers = num_hidden_layers
        self.hidden_dim = hidden_dim


        self.input = nn.Linear(input_dim, hidden_dim)
        self.hidden_layers = nn.ModuleList([nn.Linear(hidden_dim, hidden_dim) for _ in range(num_hidden_layers)])
        self.output = nn.Linear(hidden_dim, output_dim)
        self.activation = nn.Tanh()
        self.init_weights()

    def forward(self, X):
        """
        Forward pass through the network.
        X: (B, D) tensor, where B is the batch size and D is the input dimension.
        Returns: (B, output_dim) tensor
        """
        assert X.shape[1] == self.input_dim, f"Input dimension should be {self.input_dim}, but got {X.shape[1]}"

        X = self.input(X)
        X = self.activation(X)
        for layer in self.hidden_layers:
            #X = self.activation(layer(X)) + X
            X = torch.sin(layer(X)) + X
        X = self.output(X)
        return X

    def init_weights(self):
        """
        Initialize the weights of the network using Xavier uniform distribution. (tanh activation)
        """
        nn.init.xavier_uniform_(self.input.weight)
        nn.init.zeros_(self.input.bias)
        for layer in self.hidden_layers:
            nn.init.xavier_uniform_(layer.weight)
            nn.init.zeros_(layer.bias)
        nn.init.xavier_uniform_(self.output.weight)
        nn.init.zeros_(self.output.bias)

    def compute_derivatives(self, X: torch.Tensor):
        return None

In [20]:
# simple functions for testing ability to fit
def sum_square(X: torch.Tensor) -> torch.Tensor:
    Y = X*X
    return torch.sum(Y, dim=1).unsqueeze(1)

def prod_sin(X: torch.Tensor, k:float = torch.pi) -> torch.Tensor:
    Y = torch.sin(X*k)
    return torch.prod(Y, dim=1).unsqueeze(1)

In [21]:
def generate_training_points(n_interior: int = 1024, n_boundary: int = 1024, dim:int = 3) -> Tuple[torch.Tensor, ...]:
    """ 
    Generate training points on [-1,1]^dim
    n_interior: Number of interior points.
    n_boundary: Number of boundary points.
    dim: Dimension of the input space.
    Returns: Tuple of interior and boundary points each of shape (n_points, dim)
    """
    # interior points
    coords_i = torch.rand(n_interior, dim) * 2 - 1
    coords_i = coords_i.to(device)

    # boundary points
    coords_b = torch.rand(n_boundary, dim) * 2 - 1
    dim_set = torch.randint(0, dim, (n_boundary,)) # (B,)
    val_set = torch.randint(0, 2, (n_boundary,)).float() * 2 - 1 # (B,)
    coords_b[torch.arange(n_boundary), dim_set] = val_set
    coords_b = coords_b.to(device)

    return coords_i, coords_b

In [25]:
epochs = 10000
output_every = 1000

n_interior = 4096
n_boundary = 4096
dim = 3

lr = 1e-3
w_i = 1.0
w_b = 1.0

torch.manual_seed(0)
torch.cuda.manual_seed(0)

model = PINN(input_dim=dim, output_dim=1, num_hidden_layers=8, hidden_dim=32).to(device)
optimizer = optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-5)
scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs, eta_min=1e-6)
criterion = nn.MSELoss()

# training data
coords_i, coords_b = generate_training_points(n_interior=n_interior, n_boundary=n_boundary, dim=dim)
coords_i = coords_i.to(device)
coords_b = coords_b.to(device)

# y_i = sum_square(coords_i).to(device)
# y_b = sum_square(coords_b).to(device)

y_i = prod_sin(coords_i, torch.pi).to(device)
y_b = prod_sin(coords_b, torch.pi).to(device)

print(f"Interior points shape: {coords_i.shape}")
print(f"Interior points target shape: {y_i.shape}")
print(f"Boundary points shape: {coords_b.shape}")
print(f"Boundary points target shape: {y_b.shape}")

# print(f"Interior points: {coords_i.cpu().detach().numpy()}")
# print(f"Interior points target: {y_i.cpu().detach().numpy()}")
# print(f"Boundary points: {coords_b.cpu().detach().numpy()}")
# print(f"Boundary points target: {y_b.cpu().detach().numpy()}")

# training loop
for epoch in range(epochs):

    # predictions
    out_i = model(coords_i)
    out_b = model(coords_b)
    # loss
    loss_i = criterion(out_i, y_i)
    loss_b = criterion(out_b, y_b)
    loss = w_i * torch.sqrt(loss_i) + w_b * torch.sqrt(loss_b)

    # print(f"out_i: {out_i.shape}, y_i: {y_i.shape}")
    # print(f"out_b: {out_b.shape}, y_b: {y_b.shape}")
    # print(f"loss_i: {loss_i.shape}, loss_b: {loss_b.shape}")
    # print(f"loss: {loss.shape}")
    
    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
    optimizer.step()
    scheduler.step()
    if epoch % output_every == 0:
        print(f"Epoch {epoch}, Training Loss: {loss.item():.6f}")
        # print(f"gradient norm: {model.input.weight.grad.norm()}")
        # for name, param in model.named_parameters():
        #     if param.grad is not None:
        #         print(f"{name} grad norm: {param.grad.norm().item()}")
        
        # print()

Interior points shape: torch.Size([4096, 3])
Interior points target shape: torch.Size([4096, 1])
Boundary points shape: torch.Size([4096, 3])
Boundary points target shape: torch.Size([4096, 1])
Epoch 0, Training Loss: 4.882177
Epoch 1000, Training Loss: 0.067167
Epoch 2000, Training Loss: 0.060663
Epoch 3000, Training Loss: 0.038946
Epoch 4000, Training Loss: 0.026490
Epoch 5000, Training Loss: 0.025903
Epoch 6000, Training Loss: 0.012183
Epoch 7000, Training Loss: 0.011531
Epoch 8000, Training Loss: 0.008278
Epoch 9000, Training Loss: 0.006879


In [26]:
optimizer = optim.LBFGS(model.parameters(), lr=1e-1, max_iter=20, max_eval=None, tolerance_grad=1e-7, tolerance_change=1e-9, history_size=100)

def closure():
    optimizer.zero_grad()
    out_i = model(coords_i)
    out_b = model(coords_b)
    loss_i = criterion(out_i, y_i)
    loss_b = criterion(out_b, y_b)
    loss = w_i * torch.sqrt(loss_i) + w_b * torch.sqrt(loss_b)
    loss.backward()
    return loss

# Run the LBFGS optimizer
for i in range(1000):
    loss = optimizer.step(closure)
    if i % 10 == 0:
        print(f"LBFGS Iteration {i}, Training Loss: {loss.item():.6f}")
        

LBFGS Iteration 0, Training Loss: 0.006482
LBFGS Iteration 10, Training Loss: 0.004522
LBFGS Iteration 20, Training Loss: 0.003680
LBFGS Iteration 30, Training Loss: 0.003051
LBFGS Iteration 40, Training Loss: 0.002611
LBFGS Iteration 50, Training Loss: 0.002240
LBFGS Iteration 60, Training Loss: 0.002033
LBFGS Iteration 70, Training Loss: 0.001882
LBFGS Iteration 80, Training Loss: 0.001735
LBFGS Iteration 90, Training Loss: 0.001589
LBFGS Iteration 100, Training Loss: 0.001466
LBFGS Iteration 110, Training Loss: 0.001381
LBFGS Iteration 120, Training Loss: 0.001299
LBFGS Iteration 130, Training Loss: 0.001217
LBFGS Iteration 140, Training Loss: 0.001159
LBFGS Iteration 150, Training Loss: 0.001116
LBFGS Iteration 160, Training Loss: 0.001076
LBFGS Iteration 170, Training Loss: 0.001039
LBFGS Iteration 180, Training Loss: 0.001005
LBFGS Iteration 190, Training Loss: 0.000980
LBFGS Iteration 200, Training Loss: 0.000951
LBFGS Iteration 210, Training Loss: 0.000926
LBFGS Iteration 220, 