In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as tgrad
import numpy as np


A direct copy from: https://medium.com/@andeyharsha15/deep-neural-networks-for-solving-differential-equations-in-finance-da662ef0681

# The Black Scholes Formula
The Black–Scholes formula calculates the price of European put and call options. This price is consistent with the Black–Scholes equation. This follows since the formula can be obtained by solving the equation for the corresponding terminal and boundary conditions:
$$    

    {\begin{aligned}&C(0,t)=0{\text{ for all }}t\\&C(S,t)\rightarrow S-K{\text{ as }}S\rightarrow \infty \\&C(S,T)=\max\{S-K,0\}\end{aligned}}

    $$

The value of a call option for a non-dividend-paying underlying stock in terms of the Black–Scholes parameters is:

$$
    {\begin{aligned}C(S_{t},t)&=N(d_{+})S_{t}-N(d_{-})Ke^{-r(T-t)}\\d_{+}&={\frac {1}{\sigma {\sqrt {T-t}}}}\left[\ln \left({\frac {S_{t}}{K}}\right)+\left(r+{\frac {\sigma ^{2}}{2}}\right)(T-t)\right]\\d_{-}&=d_{+}-\sigma {\sqrt {T-t}}\\\end{aligned}}
    $$

The price of a corresponding put option based on put–call parity with discount factor $e^{{-r(T-t)}}$ is:
$$
    {\begin{aligned}P(S_{t},t)&=Ke^{-r(T-t)}-S_{t}+C(S_{t},t)\\&=N(-d_{-})Ke^{-r(T-t)}-N(-d_{+})S_{t}\end{aligned}}\,
    $$

In [13]:
S = torch.Tensor([80]).requires_grad_()
t = torch.Tensor([0]).requires_grad_()
sigma = torch.Tensor([0.3]).requires_grad_()
r = torch.Tensor([0.05]).requires_grad_()
K = torch.Tensor([70])
T = torch.Tensor([1])
t2m = T-t
d1 = (torch.log(S / K) + (r + 0.5 * sigma**2) * t2m)/(sigma * torch.sqrt(t2m))
d2 = d1 - sigma * torch.sqrt(t2m)
N0 = lambda value: 0.5 * (1 + torch.erf((value/2**0.5)))
Nd1 = N0(d1)
Nd2 = N0(d2)
C = S* Nd1 - K* Nd2 *torch.exp(-r*t2m)
print("Option Price:", C.item()) #17.01496

Option Price: 17.01496124267578


# The Black Scholes Equation
The gradient is calculated using the auto gradient method in pytorch.

$
{\frac {\partial V}{\partial t}}+{\frac {1}{2}}\sigma ^{2}S^{2}{\frac {\partial ^{2}V}{\partial S^{2}}}+rS{\frac {\partial V}{\partial S}}-rV=0
$

To check the correctness of the calculation, it uses the Greeks equation from the Black-Scholes Formula.

In [14]:
dCdt, = tgrad.grad(C, t, grad_outputs=torch.ones(C.shape), create_graph=True, only_inputs=True)
dCdS, = tgrad.grad(C, S, grad_outputs=torch.ones(C.shape), create_graph=True, only_inputs=True)
d2CdS2, = tgrad.grad(dCdS, S, grad_outputs=torch.ones(dCdS.shape), create_graph=True, only_inputs=True)
dCdvol, = tgrad.grad(C, sigma, grad_outputs=torch.ones(C.shape), create_graph=True, only_inputs=True)

dCdr, = tgrad.grad(C, r, grad_outputs=torch.ones(C.shape), create_graph=True, only_inputs=True)
theta, delta, gamma, vega, rho = -dCdt[0], dCdS[0], d2CdS2[0], dCdvol[0], dCdr[0]

for og in [theta, delta, gamma, vega, rho]:
    print(f'{og.item():.4f}')

    # Theta 5.8385
    # Delta 0.7769
    # Gamma 0.0124
    # Vega 23.8776
    # Rho 45.1372

    print((-theta + 0.5*sigma**2 * S**2*gamma + r*S*delta - r*C).item())

5.8385
0.0
0.7769
0.0
0.0124
0.0
23.8776
0.0
45.1372
0.0


# Data Sampling
Here in our case, the system is European Call Option PDE and the physical information about the system consists of Boundary Value conditions, Initial Value conditions and the PDE itself.

The data samples are generated by the three functions:

1.    Sampler of data inputs for t and S for Differential Loss get_diff_data()
2.    Sampler of data inputs satisfying the boundary conditions for the PDE get_bvp_data()
3.    Sampler of data inputs satisfying the initial value conditions for the PDE get_ivp_data()



In [15]:
K = 40
r = 0.05
sigma = 0.25
T = 1
S_range = [0, 130]
t_range = [0, T]
gs = lambda x: np.fmax(x-K, 0)


def get_diff_data(n):
    X = np.concatenate([np.random.uniform(*t_range, (n, 1)), 
                        np.random.uniform(*S_range, (n, 1))], axis=1)
    y = np.zeros((n, 1))
    return X, y

def get_ivp_data(n):
    X = np.concatenate([np.ones((n, 1)),
                    np.random.uniform(*S_range, (n, 1))], axis=1)
    y = gs(X[:, 1]).reshape(-1, 1)
    
    return X, y
  
def get_bvp_data(n):
    X1 = np.concatenate([np.random.uniform(*t_range, (n, 1)),
                        S_range[0] * np.ones((n, 1))], axis=1)
    y1 = np.zeros((n, 1))
    
    X2 = np.concatenate([np.random.uniform(*t_range, (n, 1)), 
                        S_range[-1] * np.ones((n, 1))], axis=1)
    y2 = (S_range[-1] - K*np.exp(-r*(T-X2[:, 0].reshape(-1)))).reshape(-1, 1)
    
    return X1, y1, X2, y2

# Deep Galerkin Method Model Construction

The Neural Network Model based on https://arxiv.org/abs/1708.07469?context=q-fin.MF

In [16]:
class DGMCell(nn.Module):
  def __init__(self, input_dim, hidden_dim, n_layers=3, output_dim=1):
    super(DGMCell, self).__init__()
    self.input_dim = input_dim
    self.hidden_dim = hidden_dim
    self.output_dim = output_dim
    self.n = n_layers

    self.sig_act = nn.Tanh()

    self.Sw = nn.Linear(self.input_dim, self.hidden_dim)

    self.Uz = nn.Linear(self.input_dim, self.hidden_dim)
    self.Wsz = nn.Linear(self.hidden_dim, self.hidden_dim)

    self.Ug = nn.Linear(self.input_dim, self.hidden_dim)
    self.Wsg = nn.Linear(self.hidden_dim, self.hidden_dim)

    self.Ur = nn.Linear(self.input_dim, self.hidden_dim)
    self.Wsr = nn.Linear(self.hidden_dim, self.hidden_dim)
    
    self.Uh = nn.Linear(self.input_dim, self.hidden_dim)
    self.Wsh = nn.Linear(self.hidden_dim, self.hidden_dim)

    self.Wf = nn.Linear(hidden_dim, output_dim)
    

  def forward(self, x):
    S1 = self.Sw(x)
    for i in range(self.n):
      if i==0:
        S = S1
      else:
        S = self.sig_act(out)
      Z = self.sig_act(self.Uz(x) + self.Wsz(S))
      G = self.sig_act(self.Ug(x) + self.Wsg(S1))
      R = self.sig_act(self.Ur(x) + self.Wsr(S))
      H = self.sig_act(self.Uh(x) + self.Wsh(S*R))
      out = (1-G)*H + Z*S
    out = self.Wf(out)
    return out

model = DGMCell(2, 100, 3, 1)
model.cuda()

n_epochs = 60000
samples = {"pde": 5000, "bvp":5000, "ivp":5000}
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=3e-5)

# Modelling

- For each iteration in the training loop, we are sampling data for the three physical conditions of the PDE.
- Then we are calculating the loss three times on the same model, accumulating them into a combined objective function to be minimised for the Neural Network.
- The first loss is the differential equation loss. Here we are trying to minimise the PDE by calculating gradients and forming the PDE itself.
- The remaining losses are calculated for boundary value and initial value conditions for the PDE.
- Mean Squared Error loss function nn.MSELoss() is chosen as the criterion to be minimised and Adam optimizer nn.optim.Adam(lr=3e-5)with a learning rate of 0.00003 is chosen for performing the weight updates.

In [17]:
loss_hist = []

for epoch in range(n_epochs):
    
    # PDE Round
    X1, y1 = get_diff_data(samples['pde'])
    X1 = torch.from_numpy(X1).float().requires_grad_().cuda()
    y1 = torch.from_numpy(y1).float().cuda()
    
    y1_hat = model(X1)
    
    grads = tgrad.grad(y1_hat, X1, grad_outputs=torch.ones(y1_hat.shape).cuda(), retain_graph=True, create_graph=True, only_inputs=True)[0]
    dVdt, dVdS = grads[:, 0].view(-1, 1), grads[:, 1].view(-1, 1)
    grads2nd = tgrad.grad(dVdS, X1, grad_outputs=torch.ones(dVdS.shape).cuda(), create_graph=True, only_inputs=True)[0]
    d2VdS2 = grads2nd[:, 1].view(-1, 1)
    S1 = X1[:, 1].view(-1, 1)
    pde_loss = criterion(-dVdt, 0.5*((sigma*S1)**2)*d2VdS2 + r*S1*dVdS - r*y1_hat)
    
    
    # BVP Round
    X21, y21, X22, y22 = get_bvp_data(samples['bvp'])
    
    X21 = torch.from_numpy(X21).float().cuda()
    y21 = torch.from_numpy(y21).float().cuda()
    
    X22 = torch.from_numpy(X22).float().cuda()
    y22 = torch.from_numpy(y22).float().cuda()
    
    y21_hat = model(X21)
    bvp1_loss = criterion(y21, y21_hat)
    
    y22_hat = model(X22)
    bvp2_loss = criterion(y22, y22_hat)
    
    
    # IVP Round
    X3, y3 = get_ivp_data(samples['ivp'])
    
    X3 = torch.from_numpy(X3).float().cuda()
    y3 = torch.from_numpy(y3).float().cuda()
    
    y3_hat = model(X3)
    ivp_loss = criterion(y3, y3_hat)
    
    # Backpropagation and Update
    optimizer.zero_grad()
    combined_loss = pde_loss + bvp1_loss + bvp2_loss + ivp_loss
    combined_loss.backward()
    optimizer.step()
    
    loss_hist.append(combined_loss.item())
    if epoch % 500 == 0:
        print(f'{epoch}/{n_epochs} PDE Loss: {pde_loss.item():.5f}, BVP1 Loss: {bvp1_loss.item():.5f}, BVP2 Loss: {bvp2_loss.item():.5f}, IVP Loss: {ivp_loss.item():.5f},')

0/60000 PDE Loss: 0.00088, BVP1 Loss: 0.26766, BVP2 Loss: 8182.09082, IVP Loss: 1837.09436,
500/60000 PDE Loss: 0.09695, BVP1 Loss: 0.00002, BVP2 Loss: 7612.56152, IVP Loss: 1659.96338,


KeyboardInterrupt: 

# American Put Option Valuation

In [None]:
K = 40
r = 0.05
sigma = 0.25
T = 1
S_range = [0, 130]
t_range = [0, T]
gs = lambda x, val: np.fmax(K-x, val)


def get_diff_data(n):
    X = np.concatenate([np.random.uniform(*t_range, (n, 1)), 
                        np.random.uniform(*S_range, (n, 1))], axis=1)
    y = np.zeros((n, 1))
    return X, y

def get_ivp_data(n):
    X = np.concatenate([np.ones((n, 1)),
                    np.random.uniform(*S_range, (n, 1))], axis=1)
    y = gs(X[:, 1], 0).reshape(-1, 1)
    
    return X, y
  
def get_bvp_data(n):
    X1 = np.concatenate([np.random.uniform(*t_range, (n, 1)),
                        S_range[-1] * np.ones((n, 1))], axis=1)
    y1 = np.zeros((n, 1))
    
    X2 = np.concatenate([np.random.uniform(*t_range, (n, 1)), 
                        S_range[0] * np.ones((n, 1))], axis=1)
    y2 = K*np.ones((n, 1))
    
    return X1, y1, X2, y2

In [None]:
# PDE Round
X1, y1 = get_diff_data(samples['pde'])
X1 = torch.from_numpy(X1).float().requires_grad_().cuda()
y1 = torch.from_numpy(y1).float().cuda()
y1_hat = model(X1)

grads = tgrad.grad(y1_hat, X1, grad_outputs=torch.ones(y1_hat.shape).cuda(), retain_graph=True, create_graph=True, only_inputs=True)[0]
dVdt, dVdS = grads[:, 0].view(-1, 1), grads[:, 1].view(-1, 1)
grads2nd = tgrad.grad(dVdS, X1, grad_outputs=torch.ones(dVdS.shape).cuda(), create_graph=True, only_inputs=True)[0]
d2VdS2 = grads2nd[:, 1].view(-1, 1)

S1 = X1[:, 1].view(-1, 1)
yint = torch.max(K - S1, torch.zeros_like(S1))

pde = (dVdt + 0.5*((sigma*S1)**2)*d2VdS2 + r*S1*dVdS - r*y1_hat)*(y1_hat - yint)
pde_loss = criterion(pde, torch.zeros_like(pde)) + criterion(torch.max(-y1_hat + yint, torch.zeros_like(yint)),  torch.zeros_like(yint))