In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as tgrad
import numpy as np
import os
import time

import utils

import networks

import matplotlib.pyplot as plt

import importlib

In [2]:
os.environ['KMP_DUPLICATE_LIB_OK']='True'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.is_available())
# torch.set_default_tensor_type(torch.DoubleTensor)
print(device)

if device == 'cuda': 
    print(torch.cuda.get_device_name())

True
cuda


# Data Sampling
Here in our case, the system is European Call Option PDE and the physical information about the system consists of Boundary Value conditions, final Value conditions and the PDE itself.

In [3]:
K = 40
r = 0.05
sigma = 0.25
T = 1
S_range = [0, 130]
t_range = [0, T]
gs = lambda x: np.fmax(x-K, 0)

# Build Neural Network

In [4]:
fnn = networks.FeedforwardNeuralNetwork(2, 50, 1, 3)
fnn.cuda()

FeedforwardNeuralNetwork(
  (layers): ModuleList(
    (0): Linear(in_features=2, out_features=50, bias=True)
    (1-2): 2 x Linear(in_features=50, out_features=50, bias=True)
  )
  (output): Linear(in_features=50, out_features=1, bias=True)
  (relu): ReLU()
)

In [5]:
import torchimize
# parallel levenberg-marquardt for several optimization problems at multiple costs
# from torchimize.functions import lsq_lma_parallel
# coeffs_list = lsq_lma_parallel(
#                     p = initials_batch,
#                     function = multi_cost_fun_batch,
#                     jac_function = multi_jac_fun_batch,
#                     args = (other_args,),
#                     wvec = torch.ones(5, device='cuda', dtype=initials_batch.dtype),
#                     ftol = 1e-8,
#                     ptol = 1e-8,
#                     gtol = 1e-8,
#                     meth = 'marq',
#                     max_iter = 40,
#                 )

n_epochs = 60000
lossFunction = nn.MSELoss()

from torchimize.functions import lsq_lma
# coeffs_list = lsq_lma(fnn.parameters(), function=lossFunction)
optimizer = optim.Adam(fnn.parameters(), lr=0.00002)

samples = {"pde": 50000, "bc":5000, "fc":5000}

# Modelling

- For each iteration in the training loop, we are sampling data for the three physical conditions of the PDE.
- Then we are calculating the loss three times on the same model, accumulating them into a combined objective function to be minimised for the Neural Network.
- The first loss is the differential equation loss. Here we are trying to minimise the PDE by calculating gradients and forming the PDE itself.
- The remaining losses are calculated for boundary value and initial value conditions for the PDE.
- Mean Squared Error loss function `nn.MSELoss()` is chosen as the criterion to be minimised and 
- Adam optimizer `nn.optim.Adam(lr=3e-5)` with a learning rate of 0.00003 is chosen for performing the weight updates.

In [6]:
loss_hist = []
start_time = time.time()

for epoch in range(n_epochs):
    
    bc_st_train, bc_v_train, n_st_train, n_v_train = \
    utils.trainingData(K, 
                       r, 
                       sigma, 
                       T, 
                       S_range[-1], 
                       S_range, 
                       t_range, 
                       gs, 
                       samples['bc'], 
                       samples['fc'], 
                       samples['pde'], 
                       RNG_key=123)
    # save training data points to tensor and send to device
    n_st_train = torch.from_numpy(n_st_train).float().requires_grad_().to(device)
    n_v_train = torch.from_numpy(n_v_train).float().to(device)
    
    bc_st_train = torch.from_numpy(bc_st_train).float().to(device)
    bc_v_train = torch.from_numpy(bc_v_train).float().to(device)
    
    
    # PDE Round
    y1_hat = fnn(n_st_train)
    grads = tgrad.grad(y1_hat, n_st_train, grad_outputs=torch.ones(y1_hat.shape).cuda(), retain_graph=True, create_graph=True, only_inputs=True)[0]
    # print(grads)
    dVdt, dVdS = grads[:, 0].view(-1, 1), grads[:, 1].view(-1, 1)
    grads2nd = tgrad.grad(dVdS, n_st_train, grad_outputs=torch.ones(dVdS.shape).cuda(), create_graph=True, only_inputs=True)[0]
    # print(grads2nd)
    d2VdS2 = grads2nd[:, 1].view(-1, 1)
    S1 = n_st_train[:, 1].view(-1, 1)
    pde_loss = lossFunction(-dVdt, 0.5*((sigma*S1)**2)*d2VdS2 + r*S1*dVdS - r*y1_hat)
    
    
    # conditions Round
    y21_hat = fnn(bc_st_train)
    bc_loss = lossFunction(bc_v_train, y21_hat)
    
    
    # Backpropagation and Update
    optimizer.zero_grad()
    combined_loss = pde_loss.mean() + bc_loss.mean()
    combined_loss.backward()
    optimizer.step()
    
    loss_hist.append(combined_loss.item())
    if epoch % 500 == 0:
        print(f'{epoch}/{n_epochs} PDE Loss: {pde_loss.item():.5f}, BC Loss: {bc_loss.item():.5f}, total loss: {combined_loss.item():5f}, minimum loss: {min(loss_hist):.5f}')

end_time = time.time()
print('run time:', end_time - start_time)

0/60000 PDE Loss: 0.00019, BC Loss: 3671.54297, total loss: 3671.543213, minimum loss: 3671.54321
500/60000 PDE Loss: 0.00040, BC Loss: 2511.03955, total loss: 2511.040039, minimum loss: 2511.04004
1000/60000 PDE Loss: 0.00317, BC Loss: 1207.01245, total loss: 1207.015625, minimum loss: 1207.01562
1500/60000 PDE Loss: 0.00633, BC Loss: 244.75986, total loss: 244.766190, minimum loss: 244.76619
2000/60000 PDE Loss: 0.00430, BC Loss: 79.33062, total loss: 79.334923, minimum loss: 79.33492
2500/60000 PDE Loss: 0.03467, BC Loss: 73.96288, total loss: 73.997551, minimum loss: 73.99755
3000/60000 PDE Loss: 0.12812, BC Loss: 71.25127, total loss: 71.379395, minimum loss: 71.37939
3500/60000 PDE Loss: 0.32444, BC Loss: 68.03030, total loss: 68.354744, minimum loss: 68.35474
4000/60000 PDE Loss: 0.68642, BC Loss: 64.16986, total loss: 64.856285, minimum loss: 64.85629
4500/60000 PDE Loss: 1.49864, BC Loss: 58.49059, total loss: 59.989227, minimum loss: 59.98923
5000/60000 PDE Loss: 3.01815, BC 