# 0 Data

In [383]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [384]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [385]:
real_data = pd.read_csv('/content/drive/MyDrive/processed_data.csv')
real_data.head()

Unnamed: 0.1,Unnamed: 0,optionid,securityid,strike,date_traded,contract_price,underlyings_price,contract_volume,days_to_maturity,moneyness,rate,volatility,mean_volatility,reversion,var_of_vol,rho,lambda
0,0,30246878.0,504880.0,27.0,2003-04-24,0.365,23.108701,5109.0,0.405479,0.855878,0.012725,0.344999,0.027174,2.514547,0.136556,-0.332888,0.860253
1,1,30246882.0,504880.0,28.0,2003-04-24,0.225,23.108701,157.0,0.405479,0.825311,0.012725,0.344999,0.035583,1.412536,0.043306,-0.189034,0.83119
2,2,30246886.0,504880.0,29.0,2003-04-24,0.131,23.108701,122.0,0.405479,0.796852,0.012725,0.344999,0.016089,2.366108,0.081914,-0.164918,0.519741
3,3,30246890.0,504880.0,30.0,2003-04-24,0.074,23.108701,6020.0,0.405479,0.77029,0.012725,0.344999,0.00422,0.108479,0.546855,-0.167983,0.899078
4,4,30262356.0,504889.0,2.8,2003-04-24,0.056,2.4555,1000.0,0.405479,0.876964,0.012725,0.26561,0.010153,3.142642,0.640966,-0.430974,1.020605


In [386]:
dim  = 7                               # Dimension of basket (number of stocks)
num_samples = 50                            # Output has length num_samples*num_mats*num_strikes.

In [395]:
def generate_inputs_nn(dim, num_samples):
    """
    Returns num_samples numbers of 7 + 2*dim input features 
    which is later fed into the neural networks.

    Arg:
    - dim: int, the number of underlyings in the basket
    
    """
    def rand(num = num_samples):
      return np.random.random(num)

    def corr(dim):
      return -0.05 - 0.7 * rand(dim) # 

    def spots(dim): # Initial spot price of each sampled from historical data
      return np.array(real_data['underlyings_price'].sample(n=dim, replace=True))

    maturity  = np.array(real_data['days_to_maturity'].sample(n = num_samples, replace = True))
    rate    = np.array(real_data['rate'].sample(n = num_samples, replace = True))
    strikes  = np.array(real_data['strike'].sample(n = num_samples, replace = True))
    rate_reversion  = 0.01 + 5 * rand()          # kappa
    vol_of_vol    = 0.01 + 0.7 * rand()         # lambda
    long_term_var = 0.001 + 0.05 * rand()         # theta
    initial_var  = 0.001 + 0.05 * rand()         # sqrt(v0)
    inputs      = np.array([maturity, strikes, initial_var, long_term_var, rate_reversion,
                               vol_of_vol, rate]).T.tolist()
    for i in range(len(inputs)):
        spot = spots(dim)
        inputs[i].insert(0, spot)
        inputs[i].insert(1, corr(dim))        

    inputs = np.array(inputs, dtype = object)
    return inputs

In [394]:
inputs_array = generate_inputs_nn(dim)
inputs = pd.DataFrame(inputs_array, columns = ['underlyings_prices', 'correlations', 'maturity', 
                        'strike', 'initial_var','long_term_var','mean_reversion_rate', 'vol_of_var','r'])
inputs.head()

Unnamed: 0,underlyings_prices,correlations,maturity,strike,initial_var,long_term_var,mean_reversion_rate,vol_of_var,r
0,"[13.5375, 48.967001953125, 36.17110107421875, ...","[-0.3241979395618749, -0.2321104245306439, -0....",0.115068,76.0,0.032922,0.019908,2.597988,0.32842,0.024225
1,"[26.982099609375, 48.925, 5.370599975585938, 1...","[-0.5953097638933954, -0.45384822638079975, -0...",0.079452,63.5,0.049812,0.015425,3.686319,0.1414,0.004496
2,"[28.83919921875, 98.5926953125, 53.13189941406...","[-0.5171559983858124, -0.7379814096026234, -0....",0.252055,41.5,0.030222,0.001034,3.627484,0.180117,0.004135
3,"[4.0455999755859375, 18.932750244140625, 58.66...","[-0.5510596682114104, -0.057154264751898026, -...",0.046575,215.0,0.035548,0.017094,3.971818,0.017584,0.022361
4,"[3.3248001098632813, 206.15, 26.3610009765625,...","[-0.6876074065223343, -0.40780228482419945, -0...",0.717808,5.55,0.008076,0.011604,3.663596,0.487811,0.002377


# 1 Model for Scalar Values

In [389]:
def generate_heston_paths(S, T, r, kappa, theta, v_0, rho, xi, 
                          steps, num_sims):  
    '''
    Produces result for a single heston run.
    
    '''
    dt = T/steps
    dim = len(S)
    S_t = np.repeat([S], num_sims,axis=0)
    v_t = v_0 + np.zeros(num_sims)[:,np.newaxis]

    for t in range(steps):
        WT1 = np.random.normal(0,1,size=(num_sims,dim))
        WT2 = np.random.normal(0,1,size=(num_sims,dim))
        WT3 = rho * WT1 + np.sqrt(1-rho**2)*WT2

        v_t = np.maximum(v_t, 0)
        S_t = S_t*(np.exp( (r - 0.5*v_t)*dt+ np.sqrt(v_t * dt) * WT1)) 
        v_t = v_t + kappa*(theta-v_t)*dt + xi*np.sqrt(v_t * dt)*WT3    

    S_t = np.mean(S_t,axis = 1)
    S_call = np.exp(-1 * r * T) * np.sum(np.maximum(S_t - K, 0)) / num_sims
    
    return S_call

# 2 Model for Vector Values

In [390]:
def generate_heston_paths_vec(df, steps=1000, num_sims=100000):  
    '''
    Produces result for nultiple heston runs.
    
    '''
    N = len(df)
    dt    = df['maturity'].values /steps 
    S_0   = df['underlyings_prices'].values 
    v_0   = df['initial_var'].values.astype('float')  
    r     = df['r'].values.astype('float')  
    theta = df['long_term_var'].values.astype('float')  
    kappa = df['mean_reversion_rate'].values.astype('float')  
    xi    = df['vol_of_var'].values.astype('float')  
    K     = df['strike'].values.astype('float')[:,np.newaxis]  
    rho   = np.array([x.astype('float64') for x in df['correlations'].values])
    T     = df['maturity'].values.astype('float') 

    dim = len(S_0[0])
    S_t = np.array([np.repeat([S_0[i]], num_sims, axis=0) for i in range(len(S_0))]).transpose(1,2,0)
    v_t = (v_0 + np.zeros((num_sims,1))[:,np.newaxis])


    for t in range(steps):
        WT1 = np.random.normal(0,1,size=(num_sims, dim, N))
        WT2 = np.random.normal(0,1,size=(num_sims,dim, N))
        WT3 = rho.T * WT1 + np.sqrt(1-rho.T**2)*WT2

        v_t = np.maximum(v_t, 0)
        S_t = S_t*(np.exp( ((r- 0.5*v_t)*dt + np.sqrt((v_t * dt).astype('float')) * WT1 ).astype('float')))
        v_t = v_t + kappa*(theta-v_t)*dt + xi*np.sqrt((v_t * dt).astype('float'))*WT3  
        
    S_t = S_t.transpose(2,0,1)
    S_t = np.mean(S_t,axis = 2)
    S_call = np.exp(-1 * r * T) * np.sum(np.maximum(S_t-K,0), axis = 1 )/ num_sims
    
    return S_call

# 3 Test implementation

Test the scalar case.

In [391]:
S_0   = inputs['underlyings_prices'].values[0] 
v_0   = inputs['initial_var'].values[0] 
r     = inputs['r'].values[0] 
theta = inputs['long_term_var'].values[0] 
kappa = inputs['mean_reversion_rate'].values[0] 
xi    = inputs['vol_of_var'].values[0] 
K     = inputs['strike'].values[0] 
rho   = inputs['correlations'].values[0] 
T     = inputs['maturity'].values[0] 

generate_heston_paths(S_0, T, r, kappa, theta, v_0, rho, xi, 
                          100, 1000)

31.454480635384787

Test the vector case.

In [392]:
generate_heston_paths_vec(inputs, steps=100, num_sims=1000)

array([3.15097504e+01, 0.00000000e+00, 7.01865788e-03, 4.30968196e+00,
       4.32209113e+01, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       9.02865615e+01, 7.54711489e+01, 2.37761493e+01, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 3.47905365e+01, 1.97614938e+01,
       6.61765795e+01, 4.68403319e+01, 0.00000000e+00, 1.62771500e-02,
       0.00000000e+00, 0.00000000e+00, 7.28868760e+01, 4.57586577e+01,
       6.11409486e+01, 0.00000000e+00, 0.00000000e+00, 1.02508709e+01,
       0.00000000e+00, 7.05058757e+01, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 3.40904335e+01, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 6.04270611e+01, 1.20223853e+01,
       0.00000000e+00, 5.62211407e+01, 7.97904712e+00, 0.00000000e+00,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 2.20260885e+01,
       0.00000000e+00, 2.75572507e+01])

We can see that the first element of `generate_heston_paths_vec` is approximately the same as `generate_heston_paths`. This verified that our models are correct.