In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as tgrad
import torch.nn.functional as F

import os
import tqdm
import time
import logging
import datetime
import numpy as np
import matplotlib.pyplot as plt

import importlib
import utils

from timeit import default_timer as timer

In [None]:
os.environ['KMP_DUPLICATE_LIB_OK']='True'
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.is_available())
# torch.set_default_tensor_type(torch.DoubleTensor)
print(device)

if device == 'cuda': 
    print(torch.cuda.get_device_name())

# The Black Scholes Formula
The Black–Scholes formula calculates the price of European put and call options. This price is consistent with the Black–Scholes equation. This follows since the formula can be obtained by solving the equation for the corresponding terminal and boundary conditions:
$$    

    {\begin{aligned}&C(0,t)=0{\text{ for all }}t\\&C(S,t)\rightarrow S-K{\text{ as }}S\rightarrow \infty \\&C(S,T)=\max\{S-K,0\}\end{aligned}}

    $$

The value of a call option for a non-dividend-paying underlying stock in terms of the Black–Scholes parameters is:

$$
    {\begin{aligned}C(S_{t},t)&=N(d_{+})S_{t}-N(d_{-})Ke^{-r(T-t)}\\d_{+}&={\frac {1}{\sigma {\sqrt {T-t}}}}\left[\ln \left({\frac {S_{t}}{K}}\right)+\left(r+{\frac {\sigma ^{2}}{2}}\right)(T-t)\right]\\d_{-}&=d_{+}-\sigma {\sqrt {T-t}}\\\end{aligned}}
    $$

The price of a corresponding put option based on put–call parity with discount factor $e^{{-r(T-t)}}$ is:
$$
    {\begin{aligned}P(S_{t},t)&=Ke^{-r(T-t)}-S_{t}+C(S_{t},t)\\&=N(-d_{-})Ke^{-r(T-t)}-N(-d_{+})S_{t}\end{aligned}}\,
    $$

In [None]:
S = torch.Tensor([80]).requires_grad_()
t = torch.Tensor([0]).requires_grad_()
sigma = torch.Tensor([0.3]).requires_grad_()
r = torch.Tensor([0.05]).requires_grad_()
K = torch.Tensor([70])
T = torch.Tensor([1])
t2m = T-t
d1 = (torch.log(S / K) + (r + 0.5 * sigma**2) * t2m)/(sigma * torch.sqrt(t2m))
d2 = d1 - sigma * torch.sqrt(t2m)
N0 = lambda value: 0.5 * (1 + torch.erf((value/2**0.5)))
Nd1 = N0(d1)
Nd2 = N0(d2)
C = S* Nd1 - K* Nd2 *torch.exp(-r*t2m)
print("Option Price:", C.item()) #17.01496

# The Black Scholes Equation
The gradient is calculated using the auto gradient method in pytorch.

$$
{\frac {\partial V}{\partial t}}+{\frac {1}{2}}\sigma ^{2}S^{2}{\frac {\partial ^{2}V}{\partial S^{2}}}+rS{\frac {\partial V}{\partial S}}-rV=0
$$

To check the correctness of the calculation, it uses the Greeks equation from the Black-Scholes Formula.

In [None]:
dCdt, = tgrad.grad(C, t, grad_outputs=torch.ones(C.shape), create_graph=True, only_inputs=True)
dCdS, = tgrad.grad(C, S, grad_outputs=torch.ones(C.shape), create_graph=True, only_inputs=True)
d2CdS2, = tgrad.grad(dCdS, S, grad_outputs=torch.ones(dCdS.shape), create_graph=True, only_inputs=True)
dCdvol, = tgrad.grad(C, sigma, grad_outputs=torch.ones(C.shape), create_graph=True, only_inputs=True)

dCdr, = tgrad.grad(C, r, grad_outputs=torch.ones(C.shape), create_graph=True, only_inputs=True)
theta, delta, gamma, vega, rho = -dCdt[0], dCdS[0], d2CdS2[0], dCdvol[0], dCdr[0]

for og in [theta, delta, gamma, vega, rho]:
    print(f'{og.item():.4f}')

    # Theta 5.8385
    # Delta 0.7769
    # Gamma 0.0124
    # Vega 23.8776
    # Rho 45.1372

    # print((-theta + 0.5*sigma**2 * S**2*gamma + r*S*delta - r*C).item())

# Data Sampling
Here in our case, the system is European Call Option PDE and the physical information about the system consists of Boundary Value conditions, final Value conditions and the PDE itself.

In [None]:
K = 40
r = 0.05
sigma = 0.25
T = 1
S_range = [0, 130]
t_range = [0, T]
gs = lambda x: np.fmax(x-K, 0)

In [None]:
samples = {"pde": 5000, "bc":500, "fc":500}

bc_st_train, bc_v_train, n_st_train, n_v_train = \
    utils.trainingData(K, 
                       r, 
                       sigma, 
                       T, 
                       S_range[-1], 
                       S_range, 
                       t_range, 
                       gs, 
                       samples['bc'], 
                       samples['fc'], 
                       samples['pde'], 
                       RNG_key=123)

In [None]:
fig = plt.figure(figsize=(9,6))
plt.scatter([sublist[0] for sublist in n_st_train], [sublist[1] for sublist in n_st_train], marker='.',alpha=0.3)
plt.scatter([sublist[0] for sublist in bc_st_train], [sublist[1] for sublist in bc_st_train], marker='X')
plt.xlabel('Time t')
plt.ylabel('Option Price s')

plt.title('Positions of collocation points and boundary data');

# Build Neural Network

In [None]:
n_epochs = 30000
sizes=[2, 50, 50, 50, 1]
lr = 3e-5
w1 = 3
w2 = 0.2

lossFunction = nn.MSELoss()

In [None]:
#  PINN models
pinn = utils.network_dispatcher('pinn', sizes, 'relu', 0, None, 10.0).to(device=device)
wpinn = utils.network_dispatcher('pinn', sizes, 'relu', 0, None, 10.0).to(device=device)
awpinn = utils.network_dispatcher('pinn', sizes, 'relu', 0, None, 10.0).to(device=device)

ipinn = utils.network_dispatcher('ipinn', sizes, 'relu', 0.0, 0.1, 10.0).to(device=device)
wipinn = utils.network_dispatcher('ipinn', sizes, 'relu', 0.0, 0.1, 10.0).to(device=device)
awipinn = utils.network_dispatcher('ipinn', sizes, 'relu', 0.0, 0.1, 10.0).to(device=device)

In [None]:
def data_sampling(K, r, sigma, T, Smax, S_range, t_range, gs, num_bc, num_fc, num_nc, RNG_key=123):
    # sampling
    bc_st_train, bc_v_train, n_st_train, n_v_train = \
        utils.trainingData(K, r, sigma, T, Smax, S_range, t_range, gs, num_bc, num_fc, num_nc, RNG_key)
    # save training data points to tensor and send to device
    n_st_train = torch.from_numpy(n_st_train).float().requires_grad_().to(device)
    n_v_train = torch.from_numpy(n_v_train).float().to(device)
            
    bc_st_train = torch.from_numpy(bc_st_train).float().to(device)
    bc_v_train = torch.from_numpy(bc_v_train).float().to(device)
    
    return n_st_train, n_v_train, bc_st_train, bc_v_train

In [None]:
def loss_fn(model, bc_st_train, bc_v_train, n_st_train, n_v_train, lossFunction):
    
    # pde loss
    y_hat = model(n_st_train)
    
    grads = tgrad.grad(y_hat, n_st_train, grad_outputs=torch.ones(y_hat.shape).cuda(), 
                retain_graph=True, create_graph=True, only_inputs=True)[0]
    dVdt, dVdS = grads[:, 0].view(-1, 1), grads[:, 1].view(-1, 1)
    grads2nd = tgrad.grad(dVdS, n_st_train, grad_outputs=torch.ones(dVdS.shape).cuda(), 
                    create_graph=True, only_inputs=True, allow_unused=True)[0]
    S1 = n_st_train[:, 1].view(-1, 1)
    d2VdS2 = grads2nd[:, 1].view(-1, 1)
    pde_loss = lossFunction(-dVdt, 0.5*((sigma*S1)**2)*d2VdS2 + r*S1*dVdS - r*y_hat)
    
    # boudary condition loss
    y2_hat = model(bc_st_train)
    bc_loss = lossFunction(bc_v_train, y2_hat)
    
    return pde_loss, bc_loss

In [None]:
def loss_dispatcher(pde_loss, bc_loss, adaptive_rate, model, w1, w2, adaptive_weight, x_f_s, x_label_s):
    '''
    @param adaptive_rate: bool, whether to use adaptive rate or not
    @param model: model, used to get the local recovery term
    @param w1: weight for pde loss
    @param w2: weight for bc loss
    @param adaptive_weight: bool, whether to use adaptive weight or not
    @return: loss
    '''
    loss = None
    if adaptive_rate:
        local_recovery_terms = torch.tensor([torch.mean(model.regressor[layer][0].A.data) for layer in range(len(model.regressor) - 1)])
        slope_recovery_term = 1 / torch.mean(torch.exp(local_recovery_terms))
        loss = w1 * pde_loss + w2 * bc_loss + slope_recovery_term
    elif adaptive_weight:
        loss = torch.exp(-x_f_s.detach()) * pde_loss + torch.exp(-x_label_s.detach()) * bc_loss
    else:
        loss = w1 * pde_loss + w2 * bc_loss
    
    mse_loss = pde_loss + bc_loss
    return loss, mse_loss

In [None]:
# initialize model and optimizer
optimizer1 = utils.optimizer_dispatcher('adam', pinn.parameters(), lr)
optimizer2 = utils.optimizer_dispatcher('adam', wpinn.parameters(), lr)
optimizer3 = utils.optimizer_dispatcher('adam', awpinn.parameters(), lr)

optimizer4 = utils.optimizer_dispatcher('adam', ipinn.parameters(), lr)
optimizer5 = utils.optimizer_dispatcher('adam', wipinn.parameters(), lr)
optimizer6 = utils.optimizer_dispatcher('adam', awipinn.parameters(), lr)


# adaptive weight
x_f_s1 = torch.tensor(0.).float().to(device).requires_grad_(True)
x_label_s1 = torch.tensor(0.).float().to(device).requires_grad_(True)
optimizer_adam_weight1 = torch.optim.Adam([x_f_s1] + [x_label_s1], lr=0.0003)

# adaptive weight
x_f_s2 = torch.tensor(0.).float().to(device).requires_grad_(True)
x_label_s2 = torch.tensor(0.).float().to(device).requires_grad_(True)
optimizer_adam_weight2 = torch.optim.Adam([x_f_s2] + [x_label_s2], lr=0.0003)

# training
loss_hist1 = []
loss_hist2 = []
loss_hist3 = []
loss_hist4 = []
loss_hist5 = []
loss_hist6 = []

log_loss_hist = []
# logging.info(f'{model}\n')
logging.info(f'Training started at {datetime.datetime.now()}\n')
start_time = timer()

# training loop
for _ in tqdm.tqdm(range(n_epochs), desc='[Training procedure]', ascii=True, total=n_epochs):

    n_st_train, n_v_train, bc_st_train, bc_v_train = data_sampling(K, r, sigma, T, S_range[-1], S_range, t_range, gs, samples['bc'], samples['fc'], samples['pde'], RNG_key=123)
    
    pde_loss1, bc_loss1 = loss_fn(pinn, bc_st_train, bc_v_train, n_st_train, n_v_train, lossFunction)
    pde_loss2, bc_loss2 = loss_fn(wpinn, bc_st_train, bc_v_train, n_st_train, n_v_train, lossFunction)
    pde_loss3, bc_loss3 = loss_fn(awpinn, bc_st_train, bc_v_train, n_st_train, n_v_train, lossFunction)
    
    pde_loss4, bc_loss4 = loss_fn(ipinn, bc_st_train, bc_v_train, n_st_train, n_v_train, lossFunction)
    pde_loss5, bc_loss5 = loss_fn(wipinn, bc_st_train, bc_v_train, n_st_train, n_v_train, lossFunction)
    pde_loss6, bc_loss6 = loss_fn(awipinn, bc_st_train, bc_v_train, n_st_train, n_v_train, lossFunction)
    
    
    loss1, mse_loss1 = loss_dispatcher(pde_loss1, bc_loss1, None, pinn, w1, w2, False, 0, 0)
    loss2, mse_loss2 = loss_dispatcher(pde_loss2, bc_loss2, None, wpinn, w1, w2, False, 0, 0)
    loss3, mse_loss3 = loss_dispatcher(pde_loss3, bc_loss3, None, awpinn, w1, w2, True, x_f_s1, x_label_s1)
    
    loss4, mse_loss4 = loss_dispatcher(pde_loss4, bc_loss4, 0.1, ipinn, w1, w2, False, 0, 0)
    loss5, mse_loss5 = loss_dispatcher(pde_loss5, bc_loss5, 0.1, wipinn, w1, w2, False, 0, 0)
    loss6, mse_loss6 = loss_dispatcher(pde_loss6, bc_loss6, 0.1, awipinn, w1, w2, True, x_f_s2, x_label_s2)
    
    
    optimizer1.zero_grad()
    loss1.backward()
    optimizer1.step()
    
    optimizer2.zero_grad()
    loss2.backward()
    optimizer2.step()
    
    optimizer3.zero_grad()
    loss3.backward()
    optimizer3.step()
    
    optimizer4.zero_grad()
    loss4.backward()
    optimizer4.step()
    
    optimizer5.zero_grad()
    loss5.backward()
    optimizer5.step()
    
    optimizer6.zero_grad()
    loss6.backward()
    optimizer6.step()
    
    
    loss_hist1.append(mse_loss1.item())
    loss_hist2.append(mse_loss2.item())
    loss_hist3.append(mse_loss3.item())
    loss_hist4.append(mse_loss4.item())
    loss_hist5.append(mse_loss5.item())
    loss_hist6.append(mse_loss6.item())
    
    
    optimizer_adam_weight1.zero_grad()
    awloss1 = torch.exp(-x_f_s1) * pde_loss3.detach() + x_f_s1 + torch.exp(-x_label_s1) * bc_loss3.detach() + x_label_s1
    awloss1.backward()
    optimizer_adam_weight1.step()
    
    optimizer_adam_weight2.zero_grad()
    awloss2 = torch.exp(-x_f_s2) * pde_loss6.detach() + x_f_s2 + torch.exp(-x_label_s2) * bc_loss6.detach() + x_label_s2
    awloss2.backward()
    optimizer_adam_weight2.step()
    
    
elapsed = timer() - start_time
logging.info(f'Training finished. Elapsed time: {elapsed} s\n')

# Plot


1: pinn
2: wpinn
3: awpinn
4: ipinn
5: wipinn
6: awipinn

In [None]:
fig = plt.figure(figsize=(12,16))
plt.plot(range(3000), [np.log(item) for item in loss_hist1[:3000]], color='blue', label='pinn')
# plt.plot(range(3000), [np.log(item) for item in loss_hist2[:3000]], label='wpinn')
plt.plot(range(3000), [np.log(item) for item in loss_hist3[:3000]], color='green', label='awpinn')
plt.plot(range(3000), [np.log(item) for item in loss_hist4[:3000]], color='red', label='ipinn')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.title(f'Speed of convergence: w1: {w1}, w2: {w2}')
plt.grid(True)

In [None]:
fig = plt.figure(figsize=(12,12))
plt.plot(range(6000), [np.log(item) for item in loss_hist1[:6000]], color='blue', label='pinn')
# plt.plot(range(6000), [np.log(item) for item in loss_hist2[:6000]], color='red', label='wpinn')
plt.plot(range(6000), [np.log(item) for item in loss_hist3[:6000]], color='green', label='awpinn')
plt.plot(range(6000), [np.log(item) for item in loss_hist4[:6000]], label='ipinn')
# plt.plot(range(6000), [np.log(item) for item in loss_hist5[:6000]], label='wipinn')
plt.plot(range(6000), [np.log(item) for item in loss_hist6[:6000]], label='awipinn')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.title(f'Speed of convergence: w1: {w1}, w2: {w2}')
plt.grid(True)

In [None]:
fig = plt.figure(figsize=(24,16))
plt.plot(range(1000), loss_hist1[:1000], color='blue', label='pinn')
# plt.plot(range(1000), loss_hist2[:1000], color='red', label='wpinn')
plt.plot(range(1000), loss_hist3[:1000], color='black', label='awpinn')
plt.plot(range(1000), loss_hist4[:1000], label='ipinn')
# plt.plot(range(1000), loss_hist5[:1000], label='wipinn')
plt.plot(range(1000), loss_hist6[:1000], label='awipinn')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.title(f'Speed of convergence: w1: {w1}, w2: {w2}')
plt.grid(True)

In [None]:
fig = plt.figure(figsize=(24,12))
plt.plot(range(29000), loss_hist1[1000:30000], color='blue', label='pinn')
# plt.plot(range(29000), loss_hist2[1000:30000], color='red', label='wpinn')
plt.plot(range(29000), loss_hist3[1000:30000], color='black', label='awpinn')
plt.plot(range(29000), loss_hist4[1000:30000], label='ipinn')
# plt.plot(range(29000), loss_hist5[1000:30000], label='wipinn')
plt.plot(range(29000), loss_hist6[1000:30000], label='awipinn')
plt.xlabel('epochs')
plt.ylabel('loss')
plt.legend()
plt.title('Minimum loss')
plt.grid(True)

In [None]:
# fig = plt.figure(figsize=(24,12))
# plt.plot(range(30000), [np.log(item) for item in loss_list[:30000]], color='blue', label='pinn')
# plt.plot(range(30000), [np.log(item) for item in loss_list2[:30000]], color='red', label='ipinn')
# plt.plot(range(30000), [np.log(item) for item in loss_list3[:30000]], color='green', label='awpinn')
# plt.plot(range(30000), [np.log(item) for item in loss_list4[:30000]], color='yellow', label='wpinn')
# plt.xlabel('epochs')
# plt.ylabel('loss')
# plt.legend()
# plt.title(f'Speed of convergence: w1: {w1}, w2: {w2}')
# plt.grid(True)