In [1]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt 
from pyDOE import *
import random
from scipy.stats.distributions import uniform

In [2]:
#############################################################
###BLACK-SCHOLES FORMULA(output V/K : scaled option price)###
#############################################################

###d-functions for BS-formula
## mns denotes moneyness

from scipy.stats.distributions import norm

def d1(mns, tau, r, sigma):
    return((np.log(mns)+(r+sigma**2/2)*tau)/((sigma * np.sqrt(tau))))

def d2(mns, tau, r, sigma):
    return((np.log(mns)+(r-sigma**2/2)*tau)/((sigma * np.sqrt(tau))))
    

def BSscaled(mns, tau, r, sigma):
    return(mns * norm.cdf(d1(mns, tau, r, sigma))
    - np.exp(-r*tau) * norm.cdf(d2(mns, tau, r, sigma)))    


############################################################################################
############################DEFINITION OF THE LHS METHOD ###################################
############################################################################################

############################################################################################
############## Construction of BS-data generation function #################################
#Inputs: num: number of samples, xlimits : range of input variables, index : "IV" or "BS" ##
#outputs: 2d array for input variables in the ANN and 1d array for the output variable #####
############################################################################################
def BSdata(num, xlimits, index):
    
    
    np.random.seed(64)
    X = lhs(4, samples = num)
    for i in range(4):
        X[:,i] = uniform(loc=xlimits[i][0], scale=xlimits[i][1]).ppf(X[:,i])
    
    mns   = X[:,0]
    tau   = X[:,1]
    r     = X[:,2]
    sigma = X[:,3]
    Y = BSscaled(mns, tau, r, sigma)
    
    if index == "IV":
        y      = X[:,3]
        X[:,3] = Y
        return( X, y)
    else:
        y = Y
        return(X, y)
        
    

In [3]:
###We produce 3 data sets for each case. 1st one has 500.000 data points and is used for model selection and hyperparameter 
###optimization , 2nd is used for intensive training of the optimized(best ranked) models and has 2 million data points, 
###and the last one is a test set containing 200.000 data points , which is used to measure the performance of the trained 
###models in new observations. *

In [21]:
index = 'IV'
xlimits = xlimits = np.array([[0.5, 1.4-0.5], [0.05, 1.0-0.05], [0.000, 0.1-0.0], [0.05, 1.00-0.05]])
num = 100000
Dataset = BSdata(num, xlimits, index)

In [22]:
pd1 = pd.DataFrame(Dataset[0], columns = ('mns', 'tau', 'r', 'sigma'))
pd1['V/K'] = Dataset[1]
pd1

Unnamed: 0,mns,tau,r,sigma,V/K
0,1.356256,0.912861,0.024530,4.335566e-01,4.335566e-01
1,0.913612,0.662823,0.065802,1.042996e-01,1.042996e-01
2,0.665896,0.537055,0.076402,1.557488e-03,1.557488e-03
3,0.505874,0.123963,0.053268,7.439032e-05,7.439032e-05
4,0.914621,0.085809,0.031681,6.037048e-02,6.037048e-02
...,...,...,...,...,...
99995,0.901346,0.170205,0.005573,7.073847e-02,7.073847e-02
99996,0.701874,0.921198,0.081548,1.506883e-03,1.506883e-03
99997,1.109584,0.590433,0.049139,2.129696e-01,2.129696e-01
99998,0.561105,0.066644,0.098482,7.478513e-100,7.478513e-100


In [23]:
min(pd1['V/K']),max(pd1['V/K'])

(0.0, 0.7080369164698711)

In [24]:
pd1.to_csv (r'C:\Users\Γιώργος\Desktop\THESIS\PYTHON\DATA\IvHyperparameter.csv',index = False)

In [25]:
pd_BS = pd.read_csv(r'C:\Users\Γιώργος\Desktop\THESIS\PYTHON\DATA\IvHyperparameter.csv')

In [26]:
pd_BS

Unnamed: 0,mns,tau,r,sigma,V/K
0,1.356256,0.912861,0.024530,4.335566e-01,4.335566e-01
1,0.913612,0.662823,0.065802,1.042996e-01,1.042996e-01
2,0.665896,0.537055,0.076402,1.557488e-03,1.557488e-03
3,0.505874,0.123963,0.053268,7.439032e-05,7.439032e-05
4,0.914621,0.085809,0.031681,6.037048e-02,6.037048e-02
...,...,...,...,...,...
99995,0.901346,0.170205,0.005573,7.073847e-02,7.073847e-02
99996,0.701874,0.921198,0.081548,1.506883e-03,1.506883e-03
99997,1.109584,0.590433,0.049139,2.129696e-01,2.129696e-01
99998,0.561105,0.066644,0.098482,7.478513e-100,7.478513e-100
