Imports

In [1]:
import f90nml
import numpy as np
from pint import UnitRegistry; AssignQuantity = UnitRegistry().Quantity
import os
import reference_solution as refsol
from scipy.fft import rfft
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import icepinn as ip

torch.set_default_dtype(torch.float64)
print(torch.cuda.device_count())
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

device = ip.get_device()

cuda
1
cuda


  return torch.tensor(NBAR - NSTAR*torch.sin(2*np.pi*Ntot)).to(device)


In [2]:
# Read in GI parameters
inputfile = "GI parameters - Reference limit cycle (for testing).nml"
GI=f90nml.read(inputfile)['GI']
nx_crystal = GI['nx_crystal']
L = GI['L']
NBAR = GI['Nbar']
NSTAR = GI['Nstar']

# Define t range (needs to be same as training file)
RUNTIME = 5
NUM_T_STEPS = RUNTIME + 1
#NUM_T_STEPS = RUNTIME*5 + 1

# Define initial conditions
Ntot_init = torch.ones(nx_crystal).to(device)
Nqll_init = ip.get_Nqll(Ntot_init)

# Define x, t pairs for training
X_QLC = np.linspace(-L,L,nx_crystal)
t_points = np.linspace(0, RUNTIME, NUM_T_STEPS)
x, t = np.meshgrid(X_QLC, t_points)
training_set = torch.tensor(np.column_stack((x.flatten(), t.flatten()))).to(device)

## Naming Legend

CL = curriculum learning  
SF = SF_Pinn architecture  
HE = hard-enforced initial condition    
SE = soft-enforced initial condition 
{n}wide = nodes per FC-layer  
nodiff = diffusion term is excluded  
LBFGS = LBFGS was used  
SGD = SGD with standard momentum  
Nesterov = SGD with Nesterov momentum


In [3]:
MODEL_NAME = "SE_128wide_nodiff_Nesterov_LBFGS"

# Hard enforce IC? (soft-enforced otherwise)
hard_enforce_IC = False
# Curriculum learning? (Only relevant for HE IC)
curriculum_learning = False
# Pre-load non-IC-enforced model? (CR only)
preload = False
# Use L-BFGS after initial optimization?
LBFGS = True

# Define model attributes
model_dimensions = torch.tensor([8, 128]).to(device) # [Num hidden layers, Nodes per layer]
is_sf_PINN = torch.tensor(False)
diffusion = False

In [4]:
# instantiate model
model = ip.IcePINN(
	num_hidden_layers=model_dimensions[0], 
	hidden_layer_size=model_dimensions[1],
	is_sf_PINN=is_sf_PINN.item()).to(device)

# Attach model attributes as buffers so they can be saved and loaded
model.register_buffer('dimensions', model_dimensions)
model.register_buffer('is_sf_PINN', is_sf_PINN)

# Initialize model weights with HE initialization
model.apply(ip.init_HE)

# # Define learning rate scheduling scheme
# scheduler_summed = optim.lr_scheduler.ReduceLROnPlateau(
#         optimizer, mode='min', factor=0.5, patience=10000
#     )

IcePINN(
  (sml): SinusoidalMappingLayer()
  (post_sml): Linear(in_features=384, out_features=128, bias=True)
  (sin): SinActivation()
  (fc_in): Linear(in_features=2, out_features=128, bias=True)
  (post_fc_in): Linear(in_features=128, out_features=128, bias=True)
  (fc_hidden): ModuleList(
    (0-5): 6 x Linear(in_features=128, out_features=128, bias=True)
  )
  (fc_out): Linear(in_features=128, out_features=2, bias=True)
)

In [5]:
print(MODEL_NAME)
print(training_set.shape)
print(ip.calc_cp_loss(model, training_set, ip.get_misc_params(), hard_enforce_IC=hard_enforce_IC).shape)
print(ip.enforced_model(training_set, model).shape)
print(model)

SE_128wide_nodiff_Nesterov_LBFGS
torch.Size([1920, 2])
torch.Size([2240, 2])
torch.Size([1920, 2])
IcePINN(
  (sml): SinusoidalMappingLayer()
  (post_sml): Linear(in_features=384, out_features=128, bias=True)
  (sin): SinActivation()
  (fc_in): Linear(in_features=2, out_features=128, bias=True)
  (post_fc_in): Linear(in_features=128, out_features=128, bias=True)
  (fc_hidden): ModuleList(
    (0-5): 6 x Linear(in_features=128, out_features=128, bias=True)
  )
  (fc_out): Linear(in_features=128, out_features=2, bias=True)
)


  return torch.tensor(NBAR - NSTAR*torch.sin(2*np.pi*Ntot)).to(device)


Train model

In [6]:
if curriculum_learning:
    # First, train without IC enforced (if it wasn't pre-loaded)
    if not preload:
        optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
        ip.train_IcePINN(
            model=model, 
            optimizer=optimizer, 
            training_set=training_set, 
            epochs=100_000, 
            name=MODEL_NAME, 
            print_every=1_000,
            diffusion=diffusion,
            LR_scheduler=None,
            enforce_IC=False)
    else:
        model = ip.load_IcePINN(MODEL_NAME, pre_IC=True)
        model.train()


In [7]:
#optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3, momentum=0.9, nesterov=True)
# SGD Nesterov has exploding gradients with LR >=1e-5

if curriculum_learning:
    # Gradually enforce IC over adjustment_period and keep it enforced
    ip.train_IcePINN(
        model=model, 
        optimizer=optimizer, 
        training_set=training_set, 
        epochs=200_000, 
        name=MODEL_NAME, 
        print_every=1_000,
        diffusion=diffusion,
        LR_scheduler=None,
        enforce_IC=True,
        adjustment_period=100_000)

else:
    if not preload:
        # Train normally with IC enforced
        ip.train_IcePINN(
            model=model, 
            optimizer=optimizer, 
            training_set=training_set, 
            epochs=100_000, 
            name=MODEL_NAME, 
            print_every=1_000,
            diffusion=diffusion,
            LR_scheduler=None,
            enforce_IC=True,
            hard_enforce_IC=hard_enforce_IC,
            adjustment_period=0)

Commencing PINN training on 1920 points for 100000 epochs.
IC is enforced with an adjustment period of 0.


  return Variable._execution_engine.run_backward(  # Calls into the C++ engine to run the backward pass


Epoch [1k/100k] at 0m 13s: Ntot = 454.456, Nqll = 109.160, LR = 0.001
Epoch [2k/100k] at 0m 25s: Ntot = 481.400, Nqll = 117.909, LR = 0.001
Epoch [3k/100k] at 0m 37s: Ntot = 517.216, Nqll = 126.883, LR = 0.001
Epoch [4k/100k] at 0m 49s: Ntot = 542.154, Nqll = 131.055, LR = 0.001
Epoch [5k/100k] at 1m 2s: Ntot = 562.760, Nqll = 133.559, LR = 0.001
Epoch [6k/100k] at 1m 14s: Ntot = 601.441, Nqll = 141.002, LR = 0.001
Epoch [7k/100k] at 1m 26s: Ntot = 657.666, Nqll = 153.327, LR = 0.001
Epoch [8k/100k] at 1m 38s: Ntot = 735.410, Nqll = 171.870, LR = 0.001
Epoch [9k/100k] at 1m 50s: Ntot = 834.408, Nqll = 195.618, LR = 0.001
Epoch [10k/100k] at 2m 3s: Ntot = 940.872, Nqll = 219.873, LR = 0.001
Training 1/10ths complete! Completion estimate: 20m 30s | 18m 27s remaining.
Best model saved so far: Epoch 164; Loss: 317.091 Ntot, 89.133 Nqll
Epoch [11k/100k] at 2m 15s: Ntot = 1025.866, Nqll = 238.694, LR = 0.001
Epoch [12k/100k] at 2m 28s: Ntot = 1025.369, Nqll = 238.647, LR = 0.001
Epoch [13k/1

In [8]:
# Load best model for future training (LBFGS)
model = ip.load_IcePINN(MODEL_NAME)
model.train()

IcePINN(
  (sml): SinusoidalMappingLayer()
  (post_sml): Linear(in_features=384, out_features=128, bias=True)
  (sin): SinActivation()
  (fc_in): Linear(in_features=2, out_features=128, bias=True)
  (post_fc_in): Linear(in_features=128, out_features=128, bias=True)
  (fc_hidden): ModuleList(
    (0-5): 6 x Linear(in_features=128, out_features=128, bias=True)
  )
  (fc_out): Linear(in_features=128, out_features=2, bias=True)
)

Prepare for L-BFGS optimization

In [9]:
lbfgs_optim = torch.optim.LBFGS(
    params=model.parameters(), 
    lr=0.1,
    max_iter=5_000, 
    history_size=5_000
)
misc_params = ip.get_misc_params()
lbfgs_iter_counter = 0
lbfgs_print_freq = 1

# closure() is called by L-BFGS when you call step() up to max_iter times
def closure():
    #lbfgs_iter_counter += 1

    lbfgs_optim.zero_grad()
    loss = ip.calc_cp_loss(model, training_set, misc_params, diffusion, hard_enforce_IC=hard_enforce_IC)
    loss.backward(torch.ones_like(loss))

    # Gradient clipping to mitigate exploding gradients
    nn.utils.clip_grad_norm_(model.parameters(), max_norm=0.5, norm_type=2)
    #nn.utils.clip_grad_value_(model.parameters(), clip_value=1.0)

    #if lbfgs_iter_counter % lbfgs_print_freq == 0:
    # sum and print loss
    Ntot_loss = torch.sum(loss[:, 0]).item()
    Nqll_loss = torch.sum(loss[:, 1]).item()
    print(f"L-BFGS iteration loss: Ntot = {Ntot_loss:.3f}, Nqll = {Nqll_loss:.3f}")
    # {lbfgs_iter_counter}
    # Return as a summed scalar loss: required by L-BFGS
    return torch.add(Ntot_loss, Nqll_loss)

L-BFGS time

In [10]:
if LBFGS:    
    # lbfgs_iter_counter = 0
    # lbfgs_print_freq = 1

    lbfgs_optim.step(closure)

    
    save_path = './models/'+MODEL_NAME

    # Create folder to store model in if necessary
    if not os.path.exists(save_path):
        os.makedirs(save_path)
    
    # Save model (not including initial condition wrapper)
    # torch.save(model.state_dict(), save_path+'/post_LBFGS_params.pth')
    # TODO - update (load) so that it can load LBFGS results
    print("Done!")


L-BFGS iteration loss: Ntot = 317.091, Nqll = 89.133
L-BFGS iteration loss: Ntot = 316.423, Nqll = 88.339
L-BFGS iteration loss: Ntot = 351.050, Nqll = 293.601
L-BFGS iteration loss: Ntot = 347.180, Nqll = 273.325
L-BFGS iteration loss: Ntot = 86892.196, Nqll = 395491.398
L-BFGS iteration loss: Ntot = 696502.010, Nqll = 3247856.370
L-BFGS iteration loss: Ntot = 618671.796, Nqll = 2888325.903
L-BFGS iteration loss: Ntot = 538281.713, Nqll = 2514530.020
L-BFGS iteration loss: Ntot = 288290.665, Nqll = 1337967.064
L-BFGS iteration loss: Ntot = 170248.123, Nqll = 797351.640
L-BFGS iteration loss: Ntot = 9124.398, Nqll = 40669.927
L-BFGS iteration loss: Ntot = 3442.931, Nqll = 14314.331
L-BFGS iteration loss: Ntot = 3364.412, Nqll = 14009.489
L-BFGS iteration loss: Ntot = 567364.331, Nqll = 2677201.801
L-BFGS iteration loss: Ntot = 445289.066, Nqll = 2064409.547
L-BFGS iteration loss: Ntot = 314427.312, Nqll = 1450019.925
L-BFGS iteration loss: Ntot = 190907.483, Nqll = 1027686.378
L-BFGS i

KeyboardInterrupt: 

In [11]:
print(training_set[0:20])

tensor([[-30.0000,   0.0000],
        [-29.8119,   0.0000],
        [-29.6238,   0.0000],
        [-29.4357,   0.0000],
        [-29.2476,   0.0000],
        [-29.0596,   0.0000],
        [-28.8715,   0.0000],
        [-28.6834,   0.0000],
        [-28.4953,   0.0000],
        [-28.3072,   0.0000],
        [-28.1191,   0.0000],
        [-27.9310,   0.0000],
        [-27.7429,   0.0000],
        [-27.5549,   0.0000],
        [-27.3668,   0.0000],
        [-27.1787,   0.0000],
        [-26.9906,   0.0000],
        [-26.8025,   0.0000],
        [-26.6144,   0.0000],
        [-26.4263,   0.0000]], device='cuda:0', grad_fn=<SliceBackward0>)


Train some more if needed?

In [None]:
# ip.train_IcePINN(
#     model=model, 
#     optimizer=optimizer, 
#     training_set=training_set, 
#     epochs=100_000, 
#     name=MODEL_NAME, 
#     print_every=1_000, 
#     diffusion=diffusion,
#     LR_scheduler=None,
#     enforce_IC=True,
#     adjustment_period=0)