Imports

In [1]:
import f90nml
import numpy as np
from pint import UnitRegistry; AssignQuantity = UnitRegistry().Quantity
import os
import reference_solution as refsol
from scipy.fft import rfft
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import icepinn as ip

torch.set_default_dtype(torch.float64)
print(torch.cuda.device_count())
device = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.mps.is_available() else 'cpu')
print(device)

device = ip.get_device()

cpu
0
mps


  return torch.tensor(NBAR - NSTAR*torch.sin(2*np.pi*Ntot)).to(device)


In [2]:
# Read in GI parameters
inputfile = "GI parameters - Reference limit cycle (for testing).nml"
GI=f90nml.read(inputfile)['GI']
nx_crystal = GI['nx_crystal']
L = GI['L']
NBAR = GI['Nbar']
NSTAR = GI['Nstar']

# Define t range in MS (needs to be same as training file)
RUNTIME = 2
NUM_T_STEPS = 100*RUNTIME + 1
#NUM_T_STEPS = RUNTIME*5 + 1

# Define initial conditions
Ntot_init = torch.ones(nx_crystal).to(device)
Nqll_init = ip.get_Nqll(Ntot_init)

# Define x, t pairs for training
X_QLC = np.linspace(-L,L,nx_crystal)
t_points = np.linspace(0, RUNTIME, NUM_T_STEPS)
x, t = np.meshgrid(X_QLC, t_points)
training_set = torch.tensor(np.column_stack((x.flatten(), t.flatten()))).to(device)

## Naming Legend

(This is just how I was notating my saved models, you can do whatever works for you. Just make sure you remember the hyperparameters & architecture somehow)

CL = curriculum learning  
SF = SF_Pinn architecture  
HE = hard-enforced initial condition    
SE = soft-enforced initial condition 
{n}wide = nodes per FC-layer  
nodiff = diffusion term is excluded  
LBFGS = LBFGS was used  
SGD = SGD with standard momentum  
Nesterov = SGD with Nesterov momentum   
AdamW was used if optimizer is unspecified  
{n}rt = trained on RUNTIME of {n}   
{n}x = {n}*RUNTIME + 1 timesteps in training set    


In [3]:
MODEL_NAME = "HE_128wide_nodiff_2rt_100x"

# Hard enforce IC? (soft-enforced otherwise)
hard_enforce_IC = True
# Curriculum learning? (Only relevant for HE IC)
curriculum_learning = False
# Pre-load stage 1 model (and use it instead of stage 1 training)?
#   - For CR, this will be the pre IC enforced model
#   - For models fine-tuned with L-BFGS, this will be the pre L-BFGS model
preload = False
# Use L-BFGS after initial optimization?
LBFGS = False

# Define model attributes
model_dimensions = torch.tensor([8, 128]).to(device) # [Num hidden layers, Nodes per layer]
is_sf_PINN = torch.tensor(False)
diffusion = False

In [5]:
# instantiate model
model = ip.IcePINN(
	num_hidden_layers=model_dimensions[0], 
	hidden_layer_size=model_dimensions[1],
	is_sf_PINN=is_sf_PINN.item()).to(device)

# Attach model attributes as buffers so they can be saved and loaded
model.register_buffer('dimensions', model_dimensions)
model.register_buffer('is_sf_PINN', is_sf_PINN)

# Initialize model weights with HE initialization
model.apply(ip.init_HE)

# # Define learning rate scheduling scheme
# scheduler_summed = optim.lr_scheduler.ReduceLROnPlateau(
#         optimizer, mode='min', factor=0.5, patience=10000
#     )

IcePINN(
  (sml): SinusoidalMappingLayer()
  (post_sml): Linear(in_features=384, out_features=128, bias=True)
  (sin): SinActivation()
  (fc_in): Linear(in_features=2, out_features=128, bias=True)
  (post_fc_in): Linear(in_features=128, out_features=128, bias=True)
  (fc_hidden): ModuleList(
    (0-5): 6 x Linear(in_features=128, out_features=128, bias=True)
  )
  (fc_out): Linear(in_features=128, out_features=2, bias=True)
)

In [6]:
print(MODEL_NAME)
print(training_set.shape)
print(ip.calc_IcePINN_loss(model, training_set, hard_enforce_IC=hard_enforce_IC).shape)
print(ip.enforced_model(training_set, model).shape)
print(model)

HE_128wide_nodiff_2rt_100x
torch.Size([64320, 2])
torch.Size([64320, 2])
torch.Size([64320, 2])
IcePINN(
  (sml): SinusoidalMappingLayer()
  (post_sml): Linear(in_features=384, out_features=128, bias=True)
  (sin): SinActivation()
  (fc_in): Linear(in_features=2, out_features=128, bias=True)
  (post_fc_in): Linear(in_features=128, out_features=128, bias=True)
  (fc_hidden): ModuleList(
    (0-5): 6 x Linear(in_features=128, out_features=128, bias=True)
  )
  (fc_out): Linear(in_features=128, out_features=2, bias=True)
)


  return torch.tensor(NBAR - NSTAR*torch.sin(2*np.pi*Ntot)).to(device)


Train model

In [7]:
if curriculum_learning:
    # First, train without IC enforced (if it wasn't pre-loaded)
    if not preload:
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
        ip.train_IcePINN(
            model=model, 
            optimizer=optimizer, 
            training_set=training_set, 
            epochs=100_000, 
            name=MODEL_NAME, 
            print_every=1_000,
            diffusion=diffusion,
            LR_scheduler=None,
            enforce_IC=False)
    else:
        model = ip.load_IcePINN(MODEL_NAME, pre_IC=True)
        model.train()


In [9]:
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
#optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, nesterov=True)
# SGD Nesterov has exploding gradients with LR >=1e-5

if curriculum_learning:
    # Gradually enforce IC over adjustment_period and keep it enforced
    ip.train_IcePINN(
        model=model, 
        optimizer=optimizer, 
        training_set=training_set, 
        epochs=200_000, 
        name=MODEL_NAME, 
        print_every=1_000,
        diffusion=diffusion,
        LR_scheduler=None,
        enforce_IC=True,
        adjustment_period=100_000)

else:
    if not preload:
        # Train normally with IC enforced
        ip.train_IcePINN(
            model=model, 
            optimizer=optimizer, 
            training_set=training_set, 
            epochs=100_000, 
            name=MODEL_NAME, 
            print_every=10,
            diffusion=diffusion,
            LR_scheduler=None,
            enforce_IC=True,
            hard_enforce_IC=hard_enforce_IC,
            adjustment_period=0)

Commencing PINN training on 64320 points for 100000 epochs.
IC is enforced with an adjustment period of 0.
Epoch [0.01k/100k] at 0m 23s: Ntot = 59793.789, Nqll = 13828.994, LR = 1e-05
Epoch [0.02k/100k] at 0m 51s: Ntot = 55388.878, Nqll = 11983.685, LR = 1e-05
Epoch [0.03k/100k] at 1m 15s: Ntot = 50172.910, Nqll = 10876.886, LR = 1e-05
Epoch [0.04k/100k] at 1m 44s: Ntot = 43560.279, Nqll = 9814.727, LR = 1e-05
Epoch [0.05k/100k] at 2m 9s: Ntot = 39053.604, Nqll = 9370.179, LR = 1e-05
Epoch [0.06k/100k] at 2m 32s: Ntot = 36756.448, Nqll = 8846.043, LR = 1e-05
Epoch [0.07k/100k] at 2m 54s: Ntot = 32955.010, Nqll = 8440.046, LR = 1e-05
Epoch [0.08k/100k] at 3m 15s: Ntot = 32603.463, Nqll = 8275.501, LR = 1e-05
Epoch [0.09k/100k] at 3m 41s: Ntot = 34667.160, Nqll = 8672.254, LR = 1e-05
Epoch [0.1k/100k] at 4m 3s: Ntot = 34698.036, Nqll = 8148.263, LR = 1e-05
Epoch [0.11k/100k] at 4m 26s: Ntot = 35088.900, Nqll = 8101.458, LR = 1e-05
Epoch [0.12k/100k] at 4m 46s: Ntot = 35505.406, Nqll = 85

KeyboardInterrupt: 

In [None]:
# Load best model for future training (LBFGS)
model = ip.load_IcePINN(MODEL_NAME)
model.train()

Prepare for L-BFGS optimization

In [None]:
lbfgs_optim = torch.optim.LBFGS(
    params=model.parameters(), 
    lr=0.1,
    max_iter=200, 
    history_size=50
)
misc_params = ip.get_misc_params()
lbfgs_iter_counter = 0
lbfgs_print_freq = 1

# closure() is called by L-BFGS when you call step() up to max_iter times
def closure():
    #lbfgs_iter_counter += 1

    lbfgs_optim.zero_grad()
    loss = ip.calc_IcePINN_loss(model, training_set, misc_params, diffusion, hard_enforce_IC=hard_enforce_IC)
    loss.backward(torch.ones_like(loss))

    # Gradient clipping to mitigate exploding gradients
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.1, norm_type=2)
    #nn.utils.clip_grad_value_(model.parameters(), clip_value=1.0)

    #if lbfgs_iter_counter % lbfgs_print_freq == 0:
    # sum and print loss
    Ntot_loss = torch.sum(loss[:, 0]).item()
    Nqll_loss = torch.sum(loss[:, 1]).item()
    print(f"L-BFGS iteration loss: Ntot = {Ntot_loss:.3f}, Nqll = {Nqll_loss:.3f}")
    # {lbfgs_iter_counter}
    # Return as a summed scalar loss: required by L-BFGS
    return torch.add(Ntot_loss, Nqll_loss)

L-BFGS time

In [None]:
if LBFGS:    
    # lbfgs_iter_counter = 0
    # lbfgs_print_freq = 1
    epochs = 10
    for e in range(epochs):
        print(f"Epoch {e+1}")
        lbfgs_optim.step(closure)

    
    save_path = './models/'+MODEL_NAME

    # Create folder to store model in if necessary
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    
    # Save model (not including initial condition wrapper)
    # torch.save(model.state_dict(), save_path+'/post_LBFGS_params.pth')
    # TODO - update (load) so that it can load LBFGS results
    print("Done!")


In [None]:
# print(training_set[0:20])

Train some more if needed?

In [None]:
# ip.train_IcePINN(
#     model=model, 
#     optimizer=optimizer, 
#     training_set=training_set, 
#     epochs=100_000, 
#     name=MODEL_NAME, 
#     print_every=1_000, 
#     diffusion=diffusion,
#     LR_scheduler=None,
#     enforce_IC=True,
#     adjustment_period=0)