In [1]:
# Commented out IPython magic to ensure Python compatibility.
# %%capture
#
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.io as sio
import keras
import keras.backend as K

from sklearn.model_selection import ParameterGrid
from py_vollib import black_scholes_merton as bsm
from progressbar import ProgressBar
from scipy.stats import gamma
from scipy.stats import beta
from scipy.stats import uniform
from keras.models import Model
from keras.layers import Input, Dense
from sklearn.model_selection import train_test_split

import scipy.sparse
from mpl_toolkits.mplot3d import Axes3D

import import_ipynb

from finite_difference_class import FDExplicitEu, FDImplicitEu, FDCnEu

importing Jupyter notebook from finite_difference_class.ipynb
4.072882278148043
-1.6291077072251005e+53
4.065801939431454
4.071594188049893
4.072254507998114
4.072238354486828


# Random data generation function

In [2]:
# S (spot price)
# gamma
def thisS(q):
    return gamma.ppf(q, a = 100, scale = 1)

# K (strike price)
# uniform (lower = 50, upper = 200)
def thisK(q):
    return uniform.ppf(q, 50, 200)

# (interest rate)
# uniform (lower = 0.01, upper = 0.18)
def thisR(q):
    return uniform.ppf(q, 0.01, 0.18)


# D (dividend)
# uniform (lower = 0.01, upper = 0.18)
def thisD(q):
    return 0
    # return uniform.ppf(q, 0.01, 0.18)

# t (time-to-maturity)
# t will be 3, 6, 9, 12 months for all examples (0.25, 0.5, 0.75, 1 year)

# sigma (volatility)
# beta (add small amount so volatility cannot be zero)
def thisSigma(q):
    return (beta.ppf(q, a = 2, b = 5) + 0.001)

# Full DataSet
A “as complete as possible” simulated dataset with almost a million observations. Due to constraints on computational resources, this was as a big a dataset we could simulate under reasonable circumstances.

In [3]:
num_increment = 12
percentiles = pd.Series(np.linspace(0.01, 0.99, num_increment))
print(percentiles)

0     0.010000
1     0.099091
2     0.188182
3     0.277273
4     0.366364
5     0.455455
6     0.544545
7     0.633636
8     0.722727
9     0.811818
10    0.900909
11    0.990000
dtype: float64


In [4]:
S = percentiles.apply(thisS).to_numpy()
# print(S)
K = percentiles.apply(thisK).to_numpy()
q = percentiles.apply(thisD).to_numpy()
t = np.array([.25, .5, .75, 1])
r = percentiles.apply(thisR).to_numpy()
sigma = percentiles.apply(thisSigma).to_numpy()

param_grid = {'S': S, 'K': K, 'q': q, 't': t, 'r': r, 'sigma': sigma}
grid = ParameterGrid(param_grid)
print(grid)
print(param_grid)

<sklearn.model_selection._search.ParameterGrid object at 0x00000184AB7180D0>
{'S': array([ 78.21598305,  87.37013918,  91.09674336,  93.88453878,
        96.29726243,  98.55429076, 100.78778343, 103.11419426,
       105.68194588, 108.7585023 , 113.06691449, 124.72256149]), 'K': array([ 52.        ,  69.81818182,  87.63636364, 105.45454545,
       123.27272727, 141.09090909, 158.90909091, 176.72727273,
       194.54545455, 212.36363636, 230.18181818, 248.        ]), 'q': array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64), 't': array([0.25, 0.5 , 0.75, 1.  ]), 'r': array([0.0118    , 0.02783636, 0.04387273, 0.05990909, 0.07594545,
       0.09198182, 0.10801818, 0.12405455, 0.14009091, 0.15612727,
       0.17216364, 0.1882    ]), 'sigma': array([0.02776319, 0.09311179, 0.13570038, 0.17347384, 0.20984291,
       0.24654192, 0.28496977, 0.32672565, 0.37420516, 0.43199458,
       0.51235236, 0.70668633])}


In [5]:
pbar = ProgressBar()
fullDF = pd.DataFrame()
prices = []
fdmprices= []
tmp = []
for params in pbar(grid):
    prices.append(bsm.black_scholes_merton(flag = 'p', S = params['S'], K = params['K'], q = params['q'], t = params['t'], r = params['r'], sigma = params['sigma']))
    
    option = FDImplicitEu(S0 = params['S'], K = params['K'], r = params['r'], T = params['t'], sigma = params['sigma'], Smax = int(5*params["K"]), M = 100, N = 100, is_call=False)
    fdmprices.append(option.price())
    tmp.append(pd.Series(params).to_frame().T)
    # fullDF = fullDF.append(pd.Series(params), ignore_index = True)
    pass
fullDF = pd.concat(tmp, ignore_index=True) # type: ignore

  5% |###                                                                     |

KeyboardInterrupt: 

In [None]:
# swap price to first column
fullDF['price'] = prices
fullDF['fdm-price'] = fdmprices

# output to csv
fullDF.to_csv('dataFull.csv', index = False)
print(fullDF.head())
print(fullDF.tail())

# Sparse DataSet
A “sparse” version of the full dataset. This set covered the same ranges for each of the parameters, but simply had fewer observations for each. This dataset ended up having 12,500 observations.

In [None]:
num_increment = 5
percentiles = pd.Series(np.linspace(0.01, 0.99, num_increment))

In [None]:
S = percentiles.apply(thisS).to_numpy()
K = percentiles.apply(thisK).to_numpy()
q = percentiles.apply(thisD).to_numpy()
t = np.array([0.25, 0.5, 0.75, 1])
r = percentiles.apply(thisR).to_numpy()
sigma = percentiles.apply(thisSigma).to_numpy()

param_grid = {'S': S, 'K' : K, 'q' : q, 't' : t, 'r' : r, 'sigma' : sigma}
grid = ParameterGrid(param_grid)

In [None]:
pbar = ProgressBar()
sparseDF = pd.DataFrame()
prices = []
fdmprices= []
tmp = []
for params in pbar(grid):
    prices.append(bsm.black_scholes_merton(flag = 'p', S = params['S'], K = params['K'], q = params['q'], t = params['t'],r = params['r'], sigma = params['sigma']))
    
    option = FDImplicitEu(S0 = params['S'], K = params['K'], r = params['r'], T = params['t'], sigma = params['sigma'], Smax = int(5*params["K"]), M = 100, N = 100, is_call=False)
    fdmprices.append(option.price())
    tmp.append(pd.Series(params).to_frame().T)
    # sparseDF = sparseDF.append(pd.Series(params), ignore_index = True)
    pass
# print(len(prices))
# print(len(tmp))
# print(tmp)
sparseDF = pd.concat(tmp, ignore_index=True) # type: ignore
# print(len(sparseDF))

100% |########################################################################|


In [None]:
# swap price to first column
sparseDF['price'] = prices
sparseDF['fdm-price'] = fdmprices

# output to csv
sparseDF.to_csv('dataSparse.csv', index = False)
print(sparseDF.head())
print(sparseDF.tail())

      K          S    q       r     sigma     t          price     fdm-price
0  52.0  78.215983  0.0  0.0118  0.027763  0.25  1.210258e-194  4.762734e-26
1  52.0  78.215983  0.0  0.0118  0.027763  0.50  2.595718e-100  8.553145e-23
2  52.0  78.215983  0.0  0.0118  0.027763  0.75   9.113920e-69  6.492417e-21
3  52.0  78.215983  0.0  0.0118  0.027763  1.00   6.009819e-53  1.350024e-19
4  52.0  78.215983  0.0  0.0118  0.164249  0.25   2.693530e-07  5.890810e-06
           K           S    q       r     sigma     t       price   fdm-price
12495  248.0  124.722561  0.0  0.1882  0.387418  1.00   83.589686   83.704518
12496  248.0  124.722561  0.0  0.1882  0.706686  0.25  112.709815  112.748824
12497  248.0  124.722561  0.0  0.1882  0.706686  0.50  105.733883  105.766608
12498  248.0  124.722561  0.0  0.1882  0.706686  0.75  100.556324  100.540536
12499  248.0  124.722561  0.0  0.1882  0.706686  1.00   96.170059   96.014092


# Extreme DataSet
A “extremes” dataset was similar to the full dataset. The only difference was that the spot prices, instead of being generated from the gamma distribution mentioned above, were instead distributed uniformly from 90 to 110. The purpose of this dataset is to test whether the neural network can generalize from this set of limited data to more “extreme” situations. This set also had almost a million observations. 

In [None]:
num_increment = 12
percentiles = pd.Series(np.linspace(0.01, 0.99, num_increment))
print(percentiles)

In [None]:
def this_extremes_S (q):
    return uniform.ppf(q, 90, 110)
S = percentiles.apply(this_extremes_S).to_numpy()
K = percentiles.apply(thisK).to_numpy()
q = percentiles.apply(thisD).to_numpy()
t = np.array([.25, .5, .75, 1])
r = percentiles.apply(thisR).to_numpy()
sigma = percentiles.apply(thisSigma).to_numpy()

param_grid = {'S': S, 'K' : K, 'q' : q, 't' : t, 'r' : r, 'sigma' : sigma}
grid = ParameterGrid(param_grid)

pbar = ProgressBar()
extremesDF = pd.DataFrame()
prices = []
fdmprices= []
tmp = []
for params in pbar(grid):
    prices.append(bsm.black_scholes_merton(flag = 'p', S = params['S'], K = params['K'], q = params['q'], t = params['t'],r = params['r'], sigma = params['sigma']))
    option = FDImplicitEu(S0 = params['S'], K = params['K'], r = params['r'], T = params['t'], sigma = params['sigma'], Smax = int(5*params["K"]), M = 100, N = 100, is_call=False)
    fdmprices.append(option.price())
    tmp.append(pd.Series(params).to_frame().T)
    pass
extremesDF = pd.concat(tmp, ignore_index=True)

In [None]:
# swap price to first column
extremesDF['price'] = prices
extremesDF['fdm-price'] = fdmprices

# output to csv
extremesDF.to_csv('dataExtremes.csv', index = True)
print(extremesDF.head())
print(extremesDF.tail())

# Neural Network 

In [None]:
# testing neural network (full data)
fullDF = pd.read_csv("dataFull.csv")

# create model 
def baseline_model():
    # layers
    i = Input(shape=(6,))
    x = Dense(10, activation='relu')(i)
    y = Dense(10, activation='relu')(x)
    o = Dense(1)(y)
    model = Model(i, o)
    model.compile(loss="mse", optimizer= "adam")
    return model

model_full = baseline_model()
X = fullDF[['S','K','q','r','sigma','t']]
y = fullDF[['price']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state = 7)
history_full = model_full.fit(X_train, y_train, batch_size = 64, epochs = 20, verbose = 2, validation_split=0.2) # set batch size to 1, otherwise there are errors when trying to

plt.plot(history_full.history['val_loss'])
plt.title('Model validation loss')
plt.ylabel('Validation Loss')
plt.xlabel('Epoch')
plt.legend(['Error', 'Test'], loc='upper left')
plt.show()
X_test_full = X_test
y_test_full = y_test
model_full.evaluate(x=X_test, y=y_test)

In [None]:
# testing neural network (sparse data)
sparseDF = pd.read_csv("dataSparse.csv")

def baseline_model():
    # create model
    i = Input(shape=(6,))
    x = Dense(10, activation='relu')(i)
    y = Dense(10, activation='relu')(x)
    o = Dense(1)(y)
    model = Model(i, o)
    model.compile(loss="mse", optimizer= "adam")
    return model

model_sparse = baseline_model()
X = sparseDF[['S','K','q','r','sigma','t']]

y = sparseDF[['price']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state = 7)
history_sparse = model_sparse.fit(X_train, y_train, batch_size = 64, epochs = 20, verbose = 2, validation_split=0.2) # set batch size to 1, otherwise there are errors when trying to

plt.plot(history_sparse.history['val_loss'])
plt.title('Model validation loss')
plt.ylabel('Validation Loss')
plt.xlabel('Epoch')
plt.legend(['Error', 'Test'], loc='upper left')
plt.show()

model_sparse.evaluate(x=X_test_full, y=y_test_full)

In [None]:
# testing neural network (extremes data)
extremesDF = pd.read_csv("dataExtremes.csv")

def baseline_model():
    # create model
    i = Input(shape=(6,))
    x = Dense(10, activation='relu')(i)
    y = Dense(10, activation='relu')(x)
    o = Dense(1)(y)
    model = Model(i, o)
    model.compile(loss="mse", optimizer= "adam")
    return model

model_extremes = baseline_model()
X = extremesDF[['S','K','q','r','sigma','t']]
y = extremesDF[['price']]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state = 7)
history_extremes = model_extremes.fit(X_train, y_train, batch_size = 64, epochs = 20, verbose = 2, validation_split=0.2) # set batch size to 1, otherwise there are errors when trying to

plt.plot(history_extremes.history['val_loss'])
plt.title('Model validation loss')
plt.ylabel('Validation Loss')
plt.xlabel('Epoch')
plt.legend(['Error', 'Test'], loc='upper left')
plt.show()

model_extremes.evaluate(x=X_test_full, y=y_test_full)

tableOutput = pd.DataFrame({'Full':history_full.history['val_loss'], \
'Sparse':history_sparse.history['val_loss'], \
'Extremes':history_extremes.history['val_loss']}, columns=['Full', 'Sparse', 'Extremes'])
tableOutput.to_csv("tableResultsValidaton.csv")

print(len(fullDF.index))
print(len(sparseDF.index))
print(len(extremesDF.index))