In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd 
import numpy as np
import matplotlib.pyplot as plt

import time

In [2]:
# torch.manual_seed(42)

In [3]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

# device = 'cpu'

print(f'device: {device}')

device: cuda:0


In [4]:
class Sequentialmodel(nn.Module):
    
    def __init__(self,layers):
        super().__init__() #call __init__ from parent class 
              
    
        self.activation = nn.Tanh()
        self.loss_function = nn.MSELoss(reduction ='mean')

        self.layers = layers
        
        'Initialise neural network as a list using nn.Modulelist'  
        self.linears = nn.ModuleList([nn.Linear(layers[i], layers[i+1]) for i in range(len(layers)-1)])
        
        for i in range(len(layers)-1):
            nn.init.xavier_normal_(self.linears[i].weight.data, gain=1.0)
            # set biases to zero
            nn.init.zeros_(self.linears[i].bias.data)

        self.H1 = self.linears[0]

        
    'forward pass'
    def forward(self,x,y,t):              
        
        # for i in range(len(self.layers)-2):
        #     z = self.linears[i](a)
        #     a = self.activation(z)

        a = torch.cat([x,y,t], dim = 1)    #(N,3)

        for i in range(len(self.layers)-2):
            z = self.linears[i](a)
            a = self.activation(z)


            
        b = self.linears[-1](a) 
         
        return b
    
    def forward_direct(self, x,y,t,keep=("x","y","t")):
        
        z = torch.cat([x,y,t], dim = 1)    #(N,3)  

        dic = {"x":0, "y":1, "t":2}  
        idx = [dic[k] for k in keep]

        N = z.shape[0]   


        W1 = self.linears[0].weight    # (20,3)

        H = W1[:, idx].unsqueeze(0)      # (1,20,3)
        F = torch.zeros_like(H)          # (1,20,3)



        

        for i in range(len(self.layers)-2):
            L = self.linears[i](z)            # (N,20)
            z = self.activation(L)            # (N,20)

            z_1 = 1.0-torch.square(z)         # (N,20)
            z_2 = -2*z + 2*torch.pow(z,3)     # (N,20)

            z_1 = z_1.unsqueeze(-1)           # (N,20,1)
            z_2 = z_2.unsqueeze(-1)           # (N,20,1)

            if H.shape[0] == 1:
                H = H.expand(N, -1, -1)                    # (N,20,3)
                F = F.expand(N, -1, -1)                    # (N,20,3)

            G = z_1*H    #\sigma'(L)*H   # (N,20,3)
            
 
            C = z_2*H*H                  # (N,20,3)


            E = C + z_1*F                # (N,20,3)

            W_next = self.linears[i+1].weight.t()       # (20,20)

            H = torch.einsum('nhk,hp->npk', G, W_next)  # (N,20,3)
            F = torch.einsum('nhk,hp->npk', E, W_next)  # (N,20,3)


        z = self.linears[-1](z)                         # (N,1)
         
        # return z, H_x, H_y, H_t, F_x, F_y, F_t
        return z, H, F                        # H - (N,1,3), F - (N,1,3)


    # # TENSORIZED
    
    
    # def forward_direct(self, x,t):
        
    #     z = torch.cat([x,t], dim = 1)    #(N,2)      N = 90000
    #     N = z.size(0)

    #     W1 = self.linears[0].weight    # (20,2)

    #     H = W1.unsqueeze(0).expand(N,-1,-1).contiguous()     #(N,20,2)
    #     F = torch.zeros_like(H)

    #     # H_x = W1[:, 0].unsqueeze(0)      # (1,20)
    #     # F_x = torch.zeros_like(H_x)

    #     # H_t = W1[:, 1].unsqueeze(0)      # (1,20)
    #     # F_t = torch.zeros_like(H_t)
        

    #     for i in range(len(self.layers)-2):
    #         L = self.linears[i](z)            # (N,20)
    #         z = self.activation(L)            # (N,20)
    #         z_1 = 1.0-torch.square(z)         # (N,20)
    #         z_1 = z_1.unsqueeze(-1)           # (N,20,1)

    #         G = z_1*H     #\sigma'(L)*H   # (N,20,2)

    #         # G_x = z_1*H_x     #\sigma'(L)*H   # (N,20)
    #         # G_t = z_1*H_t                     # (N,20)

    #         z_2 = (-2*z + 2*torch.pow(z,3))   #(N,20)
    #         z_2 = z_2.unsqueeze(-1)           # (N,20,1)

    #         C = z_2*H*H                       # (N,20,2)




    #         # C_x = (-2*z + 2*torch.pow(z,3))*H_x*H_x
    #         # C_t = (-2*z + 2*torch.pow(z,3))*H_t*H_t

    #         E = C + z_1*F                     # (N,20,2)     

    #         # E_x = C_x + z_1*F_x    # (N,50)
    #         # E_t = C_t + z_1*F_t    # (N,50)

    #         H = torch.matmul(G.transpose(1, 2),self.linears[i+1].weight.t()).transpose(1, 2)     # (N,2,20) * (20,1)  = (N,2,1).t() = (N,1,2)
    #         F = torch.matmul(E.transpose(1, 2),self.linears[i+1].weight.t()).transpose(1, 2)     # (N,1,2)


    #         # H_x = torch.matmul(G_x,self.linears[i+1].weight.t())
    #         # F_x = torch.matmul(E_x,self.linears[i+1].weight.t())

    #         # H_t = torch.matmul(G_t,self.linears[i+1].weight.t())
    #         # F_t = torch.matmul(E_t,self.linears[i+1].weight.t())

    #     z = self.linears[-1](z)

    #     H_x = H[:, 0, 0].unsqueeze(-1)   # (N,1)
    #     H_t = H[:, 0, 1].unsqueeze(-1)   # (N,1)
    #     F_x = F[:, 0, 0].unsqueeze(-1)   # (N,1)
    #     F_t = F[:, 0, 1].unsqueeze(-1)   # (N,1)
         
    #     return z, H_x, H_t, F_x, F_t
    


In [5]:
# layers = np.array([2,50,50,50,50,50,1])
layers = np.array([3,20,20,20,20,20,1])
# PINN = Sequentialmodel(layers).to(device)

In [None]:
# Resetting to ensure the reported peak truly reflects the training loop, rather than including earlier setup.

# if device.type == 'cuda':
#     torch.cuda.reset_peak_memory_stats(device)

In [6]:
# Create the training data

x = torch.linspace(0,1,150).view(-1,1)
y = torch.linspace(0,1,150).view(-1,1)
t = torch.linspace(0,5,150).view(-1,1)


if torch.is_tensor(x) != True:         
    x = torch.from_numpy(x)  
if torch.is_tensor(y) != True:         
    y = torch.from_numpy(y) 
if torch.is_tensor(t) != True:         
    t = torch.from_numpy(t) 

#convert to float
x = x.float()
y = y.float()
t = t.float()

    
x_train,y_train,t_train = torch.meshgrid(x.squeeze(),y.squeeze(),t.squeeze(), indexing = 'xy')
x_train = x_train.reshape(-1,1).to(device)     
y_train = y_train.reshape(-1,1).to(device) 
t_train = t_train.reshape(-1,1).to(device)     

# x_train = x_train.reshape(-1,1)     
# y_train = y_train.reshape(-1,1) 
# t_train = t_train.reshape(-1,1)     



In [7]:
def pde_residual(x,y,t, alpha):
    u = PINN(x,y,t)

    # z, H_x, H_y, H_t, F_x, F_y, F_t = PINN.forward_direct(x,y,t)
    z, H, F = PINN.forward_direct(x,y,t, keep=("x","y","t"))


    du_dt = H['t']              # (N,1)
    du_dx_x = F['x']            # (N,1)
    du_dy_y = F['y']            # (N,1)
    # du_dt_t = F_t

    res_pde = du_dt - alpha * (du_dx_x + du_dy_y)


    return res_pde

In [8]:
def initial_condition(x,y):
  u_ic = PINN(x, y, torch.zeros_like(x))
  res_ic = u_ic - ((torch.sin(np.pi * x))*(torch.sin(np.pi * y)))
  return res_ic

In [9]:
def boundary_condition(x,y,t):
    u_left = PINN(torch.full_like(t, 0),y, t)
    u_right = PINN(torch.full_like(t, 1),y, t)

    u_bottom = PINN(x,torch.full_like(t, 0), t)
    u_top = PINN(x,torch.full_like(t, 1), t)

    res_left = u_left - torch.zeros_like(t)
    res_right = u_right - torch.zeros_like(t)
    res_bottom = u_bottom - torch.zeros_like(t)
    res_top = u_top - torch.zeros_like(t)

    return res_left, res_right,res_bottom, res_top

In [10]:
def compute_losses():
   res_pde = pde_residual(x_train, y_train, t_train, alpha = 0.01) 
   res_ic = initial_condition(x_train,y_train)
   res_left, res_right,res_bottom, res_top = boundary_condition(x_train, y_train, t_train)

   loss_pde = torch.mean(res_pde**2)
   loss_ic = torch.mean(res_ic**2)
   loss_bc = torch.mean(res_left**2) + torch.mean(res_right**2) + torch.mean(res_bottom**2) + torch.mean(res_top**2)

   total_loss = loss_pde + loss_ic + loss_bc

   return total_loss

In [None]:
# optimizer = torch.optim.Adam(PINN.parameters(), lr=0.01)

In [None]:
# No. of epochs


# start_time = time.time()

# num_epochs = 10000



# for epoch in range(num_epochs):
#     optimizer.zero_grad()

#     total_loss = compute_losses()

    
#     total_loss.backward()

#     optimizer.step()

#     if (epoch) % 200 == 0:
#      print(f'Epoch {epoch}, Loss: {total_loss.item()}')


# end_time = time.time()

# print(f'Total Training Time: {(end_time - start_time): .4f}seconds')


    








In [11]:
torch.manual_seed(42)
PINN = Sequentialmodel(layers).to(device)
# PINN = Sequentialmodel(layers)

In [12]:
if device.type == 'cuda':
    torch.cuda.reset_peak_memory_stats(device)

In [None]:
# optimizer = torch.optim.Adam(PINN.parameters(), lr=0.01)

In [None]:
# # Threshold loss as the stopping criteria

# max_epochs = 15000
# threshold = 0.002



# start_time = time.time()

# ep = 0
# while ep < max_epochs:
#     optimizer.zero_grad()

#     total_loss = compute_losses()

    
#     total_loss.backward()

#     optimizer.step()


#     if total_loss.item() < threshold:
#         print(f"Reached threshold loss {threshold} at epoch {ep}")
#         break

#     if (ep) % 200 == 0:
#      print(f'Epoch {ep}, Loss: {total_loss.item()}')

#     ep += 1


# print(f"Training stopped at epoch {ep}, total time {time.time() - start_time:.2f} s")





In [13]:
# Using LBFGS

optimizer = torch.optim.LBFGS(PINN.parameters(), lr=0.05,max_iter=20,history_size=50,tolerance_grad=1e-9,tolerance_change=1e-9,line_search_fn='strong_wolfe')

max_outer_steps = 15000
threshold = 0.002

start_time = time.time()
ep = 0


def closure():

    optimizer.zero_grad()
    total_loss = compute_losses()
    total_loss.backward()

    return total_loss

while ep < max_outer_steps:

    total_loss = optimizer.step(closure)

    if total_loss.item() < threshold:
        print(f"Reached threshold loss {threshold} at outer step {ep}")
        break

    if ep % 200 == 0:
        print(f'Outer {ep}, Loss: {total_loss.item()}')

    ep += 1

print(f"Training stopped at outer step {ep}, total time {time.time() - start_time:.2f} s")
    

OutOfMemoryError: CUDA out of memory. Tried to allocate 774.00 MiB. GPU 0 has a total capacity of 31.48 GiB of which 3.56 MiB is free. Process 218242 has 26.19 GiB memory in use. Including non-PyTorch memory, this process has 5.26 GiB memory in use. Of the allocated memory 4.66 GiB is allocated by PyTorch, and 243.68 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
# Memory usage after training

if device.type == 'cuda':
    peak_mem = torch.cuda.max_memory_allocated(device)
    print(f'Peak GPU Memory Usage: {peak_mem / 1e6: .2f} MB')