In [1]:
import argparse
import datetime
import logging
import os
import sys
from timeit import default_timer as timer

import matplotlib.pyplot as plt
import numpy as np
import tqdm

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt


In [2]:
os.environ['KMP_DUPLICATE_LIB_OK']='True'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.is_available())
# torch.set_default_tensor_type(torch.DoubleTensor)
print(device)

if device == 'cuda': 
    print(torch.cuda.get_device_name())

True
cuda


# Network

In [3]:
class AdaptiveLinear(nn.Linear):
    r"""Applies a linear transformation to the input data as follows
    :math:`y = naxA^T + b`.
    More details available in Jagtap, A. D. et al. Locally adaptive
    activation functions with slope recovery for deep and
    physics-informed neural networks, Proc. R. Soc. 2020.

    Parameters
    ----------
    in_features : int
        The size of each input sample
    out_features : int 
        The size of each output sample
    bias : bool, optional
        If set to ``False``, the layer will not learn an additive bias
    adaptive_rate : float, optional
        Scalable adaptive rate parameter for activation function that
        is added layer-wise for each neuron separately. It is treated
        as learnable parameter and will be optimized using a optimizer
        of choice 
        (self.A is the learnable parameter which is initialized by the 
        self.adaptive rate. To create a learnable parameter for each neuron, 
        it multiplies the self.adaptive rate to the number of input features.)
    adaptive_rate_scaler : float, optional
        Fixed, pre-defined, scaling factor for adaptive activation
        functions
    """
    def __init__(self, in_features, out_features, bias=True, adaptive_rate=None, adaptive_rate_scaler=None):
        super(AdaptiveLinear, self).__init__(in_features, out_features, bias)
        self.adaptive_rate = adaptive_rate
        self.adaptive_rate_scaler = adaptive_rate_scaler
        if self.adaptive_rate:
            self.A = nn.Parameter(self.adaptive_rate * torch.ones(self.in_features))
            if not self.adaptive_rate_scaler:
                self.adaptive_rate_scaler = 10.0
            
    def forward(self, input):
        if self.adaptive_rate:
            return nn.functional.linear(self.adaptive_rate_scaler * self.A * input, self.weight, self.bias)
        return nn.functional.linear(input, self.weight, self.bias)

    def extra_repr(self):
        return (
            f'in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}, '
            f'adaptive_rate={self.adaptive_rate is not None}, adaptive_rate_scaler={self.adaptive_rate_scaler is not None}'
        )



class Net(nn.Module):
    r"""Neural approximator for the unknown function that is supposed
    to be solved.

    More details available in Raissi, M. et al. Physics-informed neural
    networks: A deep learning framework for solving forward and inverse
    problems involving nonlinear partial differential equations, J.
    Comput. Phys. 2019.

    Parameters
    ----------
    sizes : list
        Each element represents the number of neuron per layer
    activation : callable 
        Activation function
    dropout_rate : float, optional
        Dropout rate for regulrization during training process and
        uncertainty quantification by means of Monte Carlo dropout
        procedure while performing evaluation
    adaptive_rate : float, optional
        Scalable adaptive rate parameter for activation function that
        is added layer-wise for each neuron separately. It is treated
        as learnable parameter and will be optimized using a optimizer
        of choice
    adaptive_rate_scaler : float, optional
        Fixed, pre-defined, scaling factor for adaptive activation
        functions
    """
    def __init__(self, sizes, activation, dropout_rate=0.0, adaptive_rate=None, adaptive_rate_scaler=None):
        super(Net, self).__init__()
        self.regressor = nn.Sequential(
            *[Net.linear_block(in_features, out_features, activation, dropout_rate, adaptive_rate, adaptive_rate_scaler)
            for in_features, out_features in zip(sizes[:-1], sizes[1:-1])],     
            AdaptiveLinear(sizes[-2], sizes[-1]) # output layer is regular linear transformation
            )
        
    def forward(self, x):
        return self.regressor(x)

    @staticmethod
    def linear_block(in_features, out_features, activation, dropout_rate, adaptive_rate, adaptive_rate_scaler):
        activation_dispatcher = nn.ModuleDict([
            ['lrelu', nn.LeakyReLU()],
            ['relu', nn.ReLU()],
            ['tanh', nn.Tanh()],
            ['sigmoid', nn.Sigmoid()],
            # ['swish', Swish()]
        ])
        return nn.Sequential(
            AdaptiveLinear(in_features, out_features, adaptive_rate=adaptive_rate, adaptive_rate_scaler=adaptive_rate_scaler),
            activation_dispatcher[activation],
            nn.Dropout(dropout_rate),
            )

In [4]:
IPINN = Net(
    sizes=[2, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 1], activation='relu', dropout_rate=0, adaptive_rate=0.1, adaptive_rate_scaler=10.0
    )
IPINN.cuda()
print('done')

done


# Train

In [5]:
import utils
K = 10
r = 0.035
sigma = 0.2
T = 1
S_range = [0, int(5*K)]
t_range = [0, T]
gs = lambda x: np.fmax(x-K, 0)
M = 100
N = 5000

# physical constraints and sampling
samples = {"pde": 5000, "bc":500, "fc":500}

# sample data generated by finite difference method
X_train_tensor, y_train_tensor, X_test_tensor, y_test_tensor = utils.fdm_data(S_range[-1], T, M, N, "500000sample.csv", device)

In [6]:
n_epochs = 100000
adaptive_rate = 0.1
lossFunction = nn.MSELoss()
lr = 0.0001
optimizer = optim.Adam(IPINN.parameters(), lr=lr)

In [7]:
import time
import torch.autograd as tgrad

loss_hist = []
relative_l2_hist = []
min_train_loss = float("inf")  # Initialize with a large value
final_model = None
start_time = time.time()

# tqdm.tqdm(range(n_epochs), desc='[Training procedure]', ascii=True, total=n_epochs)

for _ in range(n_epochs):

    bc_st_train, bc_v_train, n_st_train, n_v_train = \
    utils.trainingData(K, r, sigma, T, S_range[-1], S_range, t_range, gs, 
                       samples['bc'], 
                       samples['fc'], 
                       samples['pde'], 
                       RNG_key=123)
    
    # save training data points to tensor and send to device
    n_st_train = torch.from_numpy(n_st_train).float().requires_grad_().to(device)
    n_v_train = torch.from_numpy(n_v_train).float().to(device)
    
    bc_st_train = torch.from_numpy(bc_st_train).float().to(device)
    bc_v_train = torch.from_numpy(bc_v_train).float().to(device)   
    
    # pde residual loss
    y1_hat = IPINN(n_st_train)
    grads = tgrad.grad(y1_hat, n_st_train, grad_outputs=torch.ones(y1_hat.shape).cuda(), 
                retain_graph=True, create_graph=True, only_inputs=True)[0]
    dVdt, dVdS = grads[:, 0].view(-1, 1), grads[:, 1].view(-1, 1)
    grads2nd = tgrad.grad(dVdS, n_st_train, grad_outputs=torch.ones(dVdS.shape).cuda(), 
                    create_graph=True, only_inputs=True, allow_unused=True)[0]
    S1 = n_st_train[:, 1].view(-1, 1)
    d2VdS2 = grads2nd[:, 1].view(-1, 1)
    pde_loss = lossFunction(-dVdt, 0.5*((sigma*S1)**2)*d2VdS2 + r*S1*dVdS - r*y1_hat)
    
    # boudary condition loss
    y2_hat = IPINN(bc_st_train)
    bc_loss = lossFunction(bc_v_train, y2_hat)
    
    # sample training data loss
    y3_hat = IPINN(X_train_tensor)
    data_loss = lossFunction(y_train_tensor, y3_hat)
    
    # relative l2 error
    y4_hat = IPINN(X_test_tensor)
    relative_l2 = torch.sqrt(torch.sum((y_test_tensor - y4_hat)**2))/torch.sqrt(torch.sum(y_test_tensor**2))
    
    # total loss
    local_recovery_terms = torch.tensor([torch.mean(IPINN.regressor[layer][0].A.data) for layer in range(len(IPINN.regressor) - 1)])
    slope_recovery_term = 1 / torch.mean(torch.exp(local_recovery_terms))
    loss = 2 *pde_loss + 1*bc_loss + slope_recovery_term + 1 * data_loss
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    mse_loss = pde_loss + bc_loss + data_loss
    loss_hist.append(mse_loss.item())
    relative_l2_hist.append(relative_l2.item())
    if _ % 500 == 0:
        print(f'{_}/{n_epochs} PDE Loss: {pde_loss.item():.5f}, BC Loss: {bc_loss.item():.5f}, data loss:{data_loss.item():.5f}, reg_loss: {slope_recovery_term.item(): 5f}, total loss: {mse_loss.item():5f}, minimum loss: {min(loss_hist):.5f}')

    if mse_loss.item() < min_train_loss:
        min_train_loss = mse_loss.item()
        final_model = IPINN.state_dict()
    pass
    
elapsed = timer() - start_time
end_time = time.time()
print('run time:', end_time - start_time)
logging.info(f'Training finished. Elapsed time: {elapsed} s\n')

0/100000 PDE Loss: 0.00001, BC Loss: 677.86414, data loss:426.87576, reg_loss:  0.904837, total loss: 1104.739868, minimum loss: 1104.73987
500/100000 PDE Loss: 0.02417, BC Loss: 0.00356, data loss:0.00392, reg_loss:  0.891277, total loss: 0.031649, minimum loss: 0.03095
1000/100000 PDE Loss: 0.00219, BC Loss: 0.00159, data loss:0.00263, reg_loss:  0.891142, total loss: 0.006399, minimum loss: 0.00468
1500/100000 PDE Loss: 0.00054, BC Loss: 0.00600, data loss:0.00681, reg_loss:  0.890797, total loss: 0.013348, minimum loss: 0.00308
2000/100000 PDE Loss: 0.00063, BC Loss: 0.00780, data loss:0.01031, reg_loss:  0.890558, total loss: 0.018739, minimum loss: 0.00282
2500/100000 PDE Loss: 0.00022, BC Loss: 0.20705, data loss:0.52658, reg_loss:  0.890350, total loss: 0.733853, minimum loss: 0.00270
3000/100000 PDE Loss: 0.00008, BC Loss: 0.00090, data loss:0.00151, reg_loss:  0.890376, total loss: 0.002495, minimum loss: 0.00249
3500/100000 PDE Loss: 0.00016, BC Loss: 0.00123, data loss:0.00

In [None]:
import pandas as pd
saved_loss = pd.DataFrame({'training loss': loss_hist, 'relative l2': relative_l2_hist})
saved_loss.to_csv('ipinn_loss_hist.csv')
fig = plt.figure(figsize=(12,12))
plt.plot(range(n_epochs), relative_l2_hist)
plt.xlabel('epochs')
plt.ylabel('loss')
plt.title('IPINN')