In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import copy

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import grad

In [2]:
table = pd.read_csv('Heston_data_input')
X = table.drop(['C_price','delta'], axis=1)
y = table[['C_price']]
#X = df[['k','T','C_price']]
#y = df[['v0','rho','kappa','theta','sigma']]
T_loc = list(X.columns).index('T')
k_loc = list(X.columns).index('k') # log strike 

In [3]:
table['C_price'].describe()

count    63.000000
mean      0.285844
std       0.134396
min       0.005952
25%       0.251236
50%       0.289855
75%       0.381712
max       0.452502
Name: C_price, dtype: float64

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
...     X, y, test_size=0.2, random_state=42)
input_scaler = preprocessing.MinMaxScaler()
output_scaler = preprocessing.MinMaxScaler()
X_train_scaled = input_scaler.fit_transform(X_train)
#y_train_scaled = output_scaler.fit_transform(y_train)
#y_train_scaled = y_train # no scaling
#X_train_scaled = X_train_scaled.reshape(X_train_scaled.shape[0],X_train_scaled.shape[1],1) # for lstm
X_test_scaled = input_scaler.transform(X_test)
#y_test_scaled = y_test # no scaling

#X_train_scaled = torch.FloatTensor(X_train_scaled,requires_grad=True)
#X_test_scaled = torch.FloatTensor(X_test_scaled,requires_grad=True)
X_train_scaled = torch.FloatTensor(X_train_scaled)
X_train_scaled.requires_grad = True
X_test_scaled = torch.FloatTensor(X_test_scaled)
X_test_scaled.requires_grad = True
y_train = torch.FloatTensor(y_train.values)
y_test = torch.FloatTensor(y_test.values)

In [5]:

num_neurons = 128


class Net(nn.Module):

    def __init__(self,num_input=7):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(num_input, num_neurons) 
        self.fc2 = nn.Linear(num_neurons, num_neurons)
        self.fc3 = nn.Linear(num_neurons, num_neurons)
#        self.fc4 = nn.Linear(num_neurons, num_neurons)
#        self.fc5 = nn.Linear(num_neurons, num_neurons)
#        self.fc6 = nn.Linear(num_neurons, num_neurons)
        self.fc7 = nn.Linear(num_neurons, 1)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.tanh(self.fc3(x))
#        x = F.relu(self.fc3(x))
#        x = F.relu(self.fc4(x))
#        x = F.relu(self.fc5(x))
#        x = F.relu(self.fc6(x))
        x = F.relu(self.fc7(x))
        return x

def weights_init(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_uniform_(m.weight)
#        torch.nn.init.normal_(m.weight)
#        xavier(m.weight.data)
#        xavier(m.bias.data)


In [6]:
model = Net()
model.apply(weights_init)
        
print(model)

Net(
  (fc1): Linear(in_features=7, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=128, bias=True)
  (fc3): Linear(in_features=128, out_features=128, bias=True)
  (fc7): Linear(in_features=128, out_features=1, bias=True)
)


# Training

In [None]:
epochs = 30001
#epochs = 101
loss_arr = []
optimizer = optim.Adam(model.parameters(), lr=1e-5, weight_decay=1e-3)
l2_lambda = 1e-5
criterion = nn.MSELoss()


for i in range(epochs):
    y_hat = model.forward(X_train_scaled)
    loss = criterion(y_hat, y_train)
    l2_reg = torch.tensor(0.)
    
    # Penalization (loop over each data)
    ## calendar arbitrage
    calendar_arbi_count = 0
    calendar_loss = torch.tensor(0.)
    butterfly_loss = torch.tensor(0.)
    for elem in X_train_scaled:
        y= model.forward(elem)
        dydx = grad(y, elem, create_graph = True)[0]
        dydT = dydx[T_loc]
        if dydT < 0.0:
            calendar_arbi_count += 1
            calendar_loss += torch.exp(-dydT) * 1e-1
    loss += calendar_loss
    ## butterfly arbitrage
    butterfly_count = 0
    dydk = dydx[k_loc] # dCdk w.r.t. log-strike 
    d2ydk2 = grad(dydx[k_loc],elem, create_graph = True)[0][k_loc] # d2Cdk2 w.r.t. log-strike
    # butterfly arbitrage: d2CdK2 = 1/K^2(d2Cdk2 - dCdk) > 0, d2Cdk2 > dCdk
    # dCdK = 1/K*dcdk
    # d2CdK2 = 1/e^2k * (d2Cdk2 - dCdk)
    butter_ineq = (d2ydk2 - dydk)/torch.exp(2.0*elem[k_loc])
    if butter_ineq < 0.0: # violation of butterfly arbitrage
        butterfly_count += 1
        butterfly_loss += torch.exp(-butter_ineq) * 1e-1    
    loss += butterfly_loss

    # regularizations
    for param in model.parameters():
        l2_reg += torch.norm(param)
        loss += l2_lambda * l2_reg
    loss_arr.append(loss)
    if calendar_arbi_count > 0: print('calendar ',i, ' ',calendar_arbi_count,' ',calendar_loss)
    if butterfly_count > 0: print('butterfly ',i, ' ',butterfly_count,' ',butterfly_loss)
 
    if i % 500 == 0:
        print(f'Epoch: {i} Loss: {loss}')
 
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

calendar 



 0   39   tensor(4.0659, grad_fn=<AddBackward0>)
Epoch: 0 Loss: 4.122244834899902
calendar  1   39   tensor(4.0631, grad_fn=<AddBackward0>)
calendar  2   38   tensor(3.9604, grad_fn=<AddBackward0>)
calendar  3   36   tensor(3.7578, grad_fn=<AddBackward0>)
calendar  4   36   tensor(3.7561, grad_fn=<AddBackward0>)
calendar  5   36   tensor(3.7534, grad_fn=<AddBackward0>)
calendar  6   36   tensor(3.7509, grad_fn=<AddBackward0>)
calendar  7   35   tensor(3.6486, grad_fn=<AddBackward0>)
calendar  8   35   tensor(3.6462, grad_fn=<AddBackward0>)
calendar  9   34   tensor(3.5438, grad_fn=<AddBackward0>)
calendar  10   33   tensor(3.4415, grad_fn=<AddBackward0>)
calendar  11   33   tensor(3.4393, grad_fn=<AddBackward0>)
calendar  12   33   tensor(3.4371, grad_fn=<AddBackward0>)
calendar  13   33   tensor(3.4349, grad_fn=<AddBackward0>)
calendar  14   33   tensor(3.4326, grad_fn=<AddBackward0>)
calendar  15   33   tensor(3.4305, grad_fn=<AddBackward0>)
calendar  16   33   tensor(3.4284, grad_fn

calendar  139   10   tensor(1.0315, grad_fn=<AddBackward0>)
calendar  140   10   tensor(1.0311, grad_fn=<AddBackward0>)
calendar  141   10   tensor(1.0317, grad_fn=<AddBackward0>)
calendar  142   10   tensor(1.0313, grad_fn=<AddBackward0>)
calendar  143   10   tensor(1.0309, grad_fn=<AddBackward0>)
calendar  144   10   tensor(1.0305, grad_fn=<AddBackward0>)
calendar  145   10   tensor(1.0305, grad_fn=<AddBackward0>)
calendar  146   10   tensor(1.0301, grad_fn=<AddBackward0>)
calendar  147   10   tensor(1.0298, grad_fn=<AddBackward0>)
calendar  148   10   tensor(1.0291, grad_fn=<AddBackward0>)
calendar  149   10   tensor(1.0287, grad_fn=<AddBackward0>)
calendar  150   10   tensor(1.0283, grad_fn=<AddBackward0>)
calendar  151   10   tensor(1.0282, grad_fn=<AddBackward0>)
calendar  152   10   tensor(1.0279, grad_fn=<AddBackward0>)
calendar  153   9   tensor(0.9272, grad_fn=<AddBackward0>)
calendar  154   9   tensor(0.9269, grad_fn=<AddBackward0>)
calendar  155   9   tensor(0.9268, grad_fn

calendar  280   3   tensor(0.3034, grad_fn=<AddBackward0>)
calendar  281   3   tensor(0.3034, grad_fn=<AddBackward0>)
calendar  282   3   tensor(0.3033, grad_fn=<AddBackward0>)
calendar  283   3   tensor(0.3032, grad_fn=<AddBackward0>)
calendar  284   3   tensor(0.3032, grad_fn=<AddBackward0>)
calendar  285   3   tensor(0.3031, grad_fn=<AddBackward0>)
calendar  286   3   tensor(0.3030, grad_fn=<AddBackward0>)
calendar  287   3   tensor(0.3030, grad_fn=<AddBackward0>)
calendar  288   3   tensor(0.3024, grad_fn=<AddBackward0>)
calendar  289   2   tensor(0.2024, grad_fn=<AddBackward0>)
calendar  290   2   tensor(0.2023, grad_fn=<AddBackward0>)
calendar  291   3   tensor(0.3026, grad_fn=<AddBackward0>)
calendar  292   3   tensor(0.3026, grad_fn=<AddBackward0>)
calendar  293   3   tensor(0.3025, grad_fn=<AddBackward0>)
calendar  294   3   tensor(0.3025, grad_fn=<AddBackward0>)
calendar  295   2   tensor(0.2022, grad_fn=<AddBackward0>)
calendar  296   2   tensor(0.2021, grad_fn=<AddBackward0

calendar  422   1   tensor(0.1004, grad_fn=<AddBackward0>)
calendar  423   1   tensor(0.1003, grad_fn=<AddBackward0>)
calendar  424   1   tensor(0.1003, grad_fn=<AddBackward0>)
calendar  425   1   tensor(0.1003, grad_fn=<AddBackward0>)
calendar  426   1   tensor(0.1003, grad_fn=<AddBackward0>)
calendar  427   1   tensor(0.1003, grad_fn=<AddBackward0>)
calendar  428   1   tensor(0.1003, grad_fn=<AddBackward0>)
calendar  429   1   tensor(0.1003, grad_fn=<AddBackward0>)
calendar  430   1   tensor(0.1002, grad_fn=<AddBackward0>)
calendar  431   1   tensor(0.1002, grad_fn=<AddBackward0>)
calendar  432   1   tensor(0.1002, grad_fn=<AddBackward0>)
calendar  433   1   tensor(0.1002, grad_fn=<AddBackward0>)
calendar  434   1   tensor(0.1002, grad_fn=<AddBackward0>)
calendar  435   1   tensor(0.1002, grad_fn=<AddBackward0>)
calendar  436   1   tensor(0.1002, grad_fn=<AddBackward0>)
calendar  437   1   tensor(0.1001, grad_fn=<AddBackward0>)
calendar  438   1   tensor(0.1001, grad_fn=<AddBackward0

calendar  868   1   tensor(0.1000, grad_fn=<AddBackward0>)
calendar  871   1   tensor(0.1000, grad_fn=<AddBackward0>)
calendar  878   1   tensor(0.1000, grad_fn=<AddBackward0>)
calendar  882   1   tensor(0.1006, grad_fn=<AddBackward0>)
calendar  883   1   tensor(0.1006, grad_fn=<AddBackward0>)
calendar  884   1   tensor(0.1006, grad_fn=<AddBackward0>)
calendar  885   1   tensor(0.1005, grad_fn=<AddBackward0>)
calendar  886   1   tensor(0.1005, grad_fn=<AddBackward0>)
calendar  887   1   tensor(0.1005, grad_fn=<AddBackward0>)
calendar  888   1   tensor(0.1005, grad_fn=<AddBackward0>)
calendar  889   1   tensor(0.1005, grad_fn=<AddBackward0>)
calendar  890   1   tensor(0.1005, grad_fn=<AddBackward0>)
calendar  891   1   tensor(0.1004, grad_fn=<AddBackward0>)
calendar  892   1   tensor(0.1004, grad_fn=<AddBackward0>)
calendar  893   2   tensor(0.2008, grad_fn=<AddBackward0>)
calendar  894   2   tensor(0.2008, grad_fn=<AddBackward0>)
calendar  895   2   tensor(0.2008, grad_fn=<AddBackward0

calendar  1226   1   tensor(0.1009, grad_fn=<AddBackward0>)
calendar  1227   1   tensor(0.1009, grad_fn=<AddBackward0>)
calendar  1228   1   tensor(0.1009, grad_fn=<AddBackward0>)
calendar  1229   1   tensor(0.1008, grad_fn=<AddBackward0>)
calendar  1230   1   tensor(0.1008, grad_fn=<AddBackward0>)
calendar  1231   1   tensor(0.1008, grad_fn=<AddBackward0>)
calendar  1232   1   tensor(0.1007, grad_fn=<AddBackward0>)
calendar  1233   1   tensor(0.1007, grad_fn=<AddBackward0>)
calendar  1234   1   tensor(0.1007, grad_fn=<AddBackward0>)
calendar  1235   1   tensor(0.1006, grad_fn=<AddBackward0>)
calendar  1236   1   tensor(0.1006, grad_fn=<AddBackward0>)
calendar  1237   1   tensor(0.1006, grad_fn=<AddBackward0>)
calendar  1238   1   tensor(0.1005, grad_fn=<AddBackward0>)
calendar  1239   1   tensor(0.1005, grad_fn=<AddBackward0>)
calendar  1240   1   tensor(0.1005, grad_fn=<AddBackward0>)
calendar  1241   1   tensor(0.1004, grad_fn=<AddBackward0>)
calendar  1242   1   tensor(0.1004, grad

In [None]:
plt.plot(loss_arr)
plt.yscale('log')

In [None]:
inp = X_train_scaled[0].clone().detach()
inp.requires_grad = True

In [None]:
out=model.forward(inp)

In [None]:
out.backward()

In [None]:
inp.grad

In [None]:
inp1 = X_train_scaled[0].clone().detach()
dx = 1e-3
inp1[0] = torch.add(inp1[0],dx)

In [None]:
out1= model.forward(inp1)
dy = out1 - out
print(dy)

In [None]:
dydx = dy/dx

In [None]:
# 2nd order diff
x = []
ind = 0
tmp = X_train_scaled[0].clone().detach(); tmp.requires_grad = True
tmp[ind] -= dx
x.append(tmp)
tmp = X_train_scaled[0].clone().detach(); tmp.requires_grad = True
x.append(tmp)
tmp = X_train_scaled[0].clone().detach(); tmp.requires_grad = True
tmp[ind] += dx
x.append(tmp)
y = []
for elem in x:
    y.append(model.forward(elem))

dydx_fd = (y[2] - y[1]) / dx
d2ydx2_fd = (y[2] + y[0] - 2.*y[1]) / dx**2
print('1st diff FD ',dydx_fd)
print('2nd diff FD ',d2ydx2_fd)

# autograd

In [None]:
x = X_train_scaled[0].clone().detach()
x.requires_grad = True
y= model.forward(x)
dydx = grad(y,x, create_graph = True)[0]

In [None]:
if dydx[T_loc] < 0.0:
    print('nono ',torch.exp(dydx[T_loc] ))

In [None]:
d2ydx2 = grad(dydx[T_loc],x, create_graph = True)[0]

In [None]:
d2ydx2

In [None]:
x = X_train_scaled.clone().detach()
x.requires_grad = True
y= model.forward(x)

In [None]:
calendar_loss = torch.tensor(0.)
butterfly_loss = torch.tensor(0.)
for elem in X_train_scaled:
    y= model.forward(elem)
    dydx = grad(y, elem, create_graph = True)[0]
    dydT = dydx[T_loc]
    if dydT < 0.0:
        calendar_arbi_count += 1
        calendar_loss += torch.exp(-dydT[T_loc]) * 1e-1
    dydk = dydx[k_loc] # dCdk w.r.t. log-strike 
    d2ydk2 = grad(dydx[k_loc],elem, create_graph = True)[0][k_loc] # d2Cdk2 w.r.t. log-strike
    # dCdK = 1/K*dcdk
    # d2CdK2 = 1/e^2k * (d2Cdk2 - dCdk)
    # butterfly arbitrage: d2CdK2 = 1/K^2(d2Cdk2 - dCdk) > 0, d2Cdk2 > dCdk
    if d2ydk2 - dydk < 0.0: # violation of butterfly arbitrage
        butterfly_loss += torch.exp(-(d2ydk2 - dydk)) * 1e-1

In [None]:
dydk.requires_grad

In [None]:
x.requires_grad

In [None]:
if d2ydk2 < 0:
    print('shit')