# 0 Data

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
real_data = pd.read_csv('/content/drive/MyDrive/processed_data.csv')
real_data.head()

Unnamed: 0.1,Unnamed: 0,optionid,securityid,strike,date_traded,contract_price,underlyings_price,contract_volume,days_to_maturity,moneyness,rate,volatility,mean_volatility,reversion,var_of_vol,rho,lambda
0,0,30246878.0,504880.0,27.0,2003-04-24,0.365,23.108701,5109.0,0.405479,0.855878,0.012725,0.344999,0.027174,2.514547,0.136556,-0.332888,0.860253
1,1,30246882.0,504880.0,28.0,2003-04-24,0.225,23.108701,157.0,0.405479,0.825311,0.012725,0.344999,0.035583,1.412536,0.043306,-0.189034,0.83119
2,2,30246886.0,504880.0,29.0,2003-04-24,0.131,23.108701,122.0,0.405479,0.796852,0.012725,0.344999,0.016089,2.366108,0.081914,-0.164918,0.519741
3,3,30246890.0,504880.0,30.0,2003-04-24,0.074,23.108701,6020.0,0.405479,0.77029,0.012725,0.344999,0.00422,0.108479,0.546855,-0.167983,0.899078
4,4,30262356.0,504889.0,2.8,2003-04-24,0.056,2.4555,1000.0,0.405479,0.876964,0.012725,0.26561,0.010153,3.142642,0.640966,-0.430974,1.020605


In [5]:
dim  = 7                               # Dimension of basket (number of stocks)
num_samples = 50                            # Output has length num_samples*num_mats*num_strikes.

In [6]:
def rand(num = num_samples):
    return np.random.random(num)

def corr(dim):
    return -0.05 - 0.7 * rand(dim) # 

def spots(dim): # Initial spot price of each sampled from historical data
    return np.array(real_data['underlyings_price'].sample(n=dim, replace=True))

def generate_inputs_nn(dim):
    """
    Returns num_samples numbers of 7 + 2*dim input features 
    which is later fed into the neural networks.

    Arg:
    - dim: int, the number of underlyings in the basket
    
    """
    maturity  = np.array(real_data['days_to_maturity'].sample(n = num_samples, replace = True))
    rate    = np.array(real_data['rate'].sample(n = num_samples, replace = True))
    strikes  = np.array(real_data['strike'].sample(n = num_samples, replace = True))
    rate_reversion  = 0.01 + 5 * rand()          # kappa
    vol_of_vol    = 0.01 + 0.7 * rand()        # lambda
    long_term_var = 0.001 + 0.05 * rand()         # theta
    initial_var  = 0.001 + 0.05 * rand()         # sqrt(v0)
    inputs      = np.array([maturity, strikes, initial_var, long_term_var, rate_reversion,
                               vol_of_vol, rate]).T.tolist()
    for i in range(len(inputs)):
        spot = spots(dim)
        inputs[i].insert(0, spot)
        inputs[i].insert(1, corr(dim))        

    inputs = np.array(inputs, dtype = object)
    return inputs

In [7]:
inputs_array = generate_inputs_nn(dim)
inputs_array

array([[array([ 45.13299805,  41.58509766, 206.07720703,   1.21790001,
                95.63360352,  33.24830078,  15.22425049])             ,
        array([-0.73628601, -0.45338665, -0.26842794, -0.64370111, -0.17822197,
               -0.19585435, -0.66656565])                                      ,
        0.1561643835616438, 175.0, 0.026587220478024788,
        0.02448624359425354, 0.581926640212298, 0.5116229154023298,
        0.0009616139919478],
       [array([  4.31279999,  14.23594971,  73.05859863,  71.885     ,
                30.76629883,  96.32519531, 130.01240234])             ,
        array([-0.56946201, -0.53804231, -0.25894513, -0.67622973, -0.45232687,
               -0.66467451, -0.65699476])                                      ,
        0.1232876712328767, 5.3, 0.006878728965296615,
        0.03478878865713935, 3.84133551004859, 0.06316024281444918,
        0.0110578610185397],
       [array([ 1.29779999, 70.29850098, 86.10429688, 39.95169922, 53.19259766,
      

In [182]:
inputs = pd.DataFrame(inputs_array, columns = ['underlyings_prices', 'correlations', 'maturity', 
                        'strike', 'initial_var','long_term_var','mean_reversion_rate', 'vol_of_var','r'])
inputs.head()

Unnamed: 0,underlyings_prices,correlations,maturity,strike,initial_var,long_term_var,mean_reversion_rate,vol_of_var,r
0,"[45.132998046875, 41.58509765625, 206.07720703...","[-0.7362860105131611, -0.4533866471248037, -0....",0.156164,175.0,0.026587,0.024486,0.581927,0.511623,0.000962
1,"[4.312799987792968, 14.23594970703125, 73.0585...","[-0.5694620109458479, -0.5380423133379794, -0....",0.123288,5.3,0.006879,0.034789,3.841336,0.06316,0.011058
2,"[1.297799987792969, 70.2985009765625, 86.10429...","[-0.18235464133525348, -0.13146845876865476, -...",0.145205,12.8,0.002635,0.032407,0.641851,0.340774,0.001483
3,"[87.555703125, 4.936900024414062, 34.524499511...","[-0.5702806174108089, -0.08399332936332468, -0...",0.161644,27.75,0.019996,0.048235,2.125244,0.429227,0.002148
4,"[6.643400268554688, 130.35240234375, 36.071899...","[-0.3763736264439012, -0.36679923610007953, -0...",0.372603,124.5,0.046104,0.044113,4.046311,0.405051,0.053075


# 1 Model for Scalar Values

In [8]:
def generate_heston_paths(S, T, r, kappa, theta, v_0, rho, xi, 
                          steps, num_sims):  
    '''
    Produces result for a single heston run.
    
    '''
    dt = T/steps
    dim = len(S)
    S_t = np.repeat([S], num_sims,axis=0)
    v_t = v_0 + np.zeros(num_sims)[:,np.newaxis]

    for t in range(steps):
        WT1 = np.random.normal(0,1,size=(num_sims,dim))
        WT2 = np.random.normal(0,1,size=(num_sims,dim))
        WT3 = rho * WT1 + np.sqrt(1-rho**2)*WT2

        v_t = np.maximum(v_t, 0)
        S_t = S_t*(np.exp( (r - 0.5*v_t)*dt+ np.sqrt(v_t * dt) * WT1)) 
        v_t = v_t + kappa*(theta-v_t)*dt + xi*np.sqrt(v_t * dt)*WT3    
        
    S_t = np.mean(S_t,axis = 1)
    S_call = np.exp(-1 * r * T) * np.sum(np.maximum(S_t - K, 0)) / num_sims
    
    return S_call

# 2 Model for Vector Values

In [171]:
def generate_heston_paths_vec(df, steps=1000, num_sims=100000):  
    '''
    Produces result for nultiple heston runs.
    
    '''
    N = len(df)
    dt    = inputs['maturity'].values /steps 
    S_0   = inputs['underlyings_prices'].values 
    v_0   = inputs['initial_var'].values.astype('float')  
    r     = inputs['r'].values.astype('float')  
    theta = inputs['long_term_var'].values.astype('float')  
    kappa = inputs['mean_reversion_rate'].values.astype('float')  
    xi    = inputs['vol_of_var'].values.astype('float')  
    K     = inputs['strike'].values.astype('float') 
    rho   = inputs['correlations'].values 
    T     = inputs['maturity'].values.astype('float') 

    dim = len(S_0[0])
    S_t = np.array([np.repeat([S_0[i]], num_sims, axis=0) for i in range(len(S_0))]).transpose(1,2,0)
    v_t = (v_0 + np.zeros((num_sims,1))[:,np.newaxis])


    for t in range(steps):
        WT1 = np.random.normal(0,1,size=(num_sims, dim, N))
        WT2 = np.random.normal(0,1,size=(num_sims,dim,N))
        WT3 = rho.T * WT1 + np.sqrt(1-rho**2)*WT2

        v_t = np.maximum(v_t, 0)
        S_t = S_t*(np.exp( ((r- 0.5*v_t)*dt + np.sqrt((v_t * dt).astype('float')) * WT1 ).astype('float')))
        v_t = v_t + kappa*(theta-v_t)*dt + xi*np.sqrt((v_t * dt).astype('float'))*WT3    
        
    S_t = np.mean(S_t,axis = 1)
    S_call = np.exp(-1 * r * T) * np.sum(np.maximum(S_t - K, 0)) / num_sims
    
    return S_call

# 3 Test implementation

Test the scalar case.

In [42]:
S_0   = inputs['underlyings_prices'].values[0] 
v_0   = inputs['initial_var'].values[0] 
r     = inputs['r'].values[0] 
theta = inputs['long_term_var'].values[0] 
kappa = inputs['mean_reversion_rate'].values[0] 
xi    = inputs['vol_of_var'].values[0] 
K     = inputs['strike'].values[0] 
rho   = inputs['correlations'].values[0] 
T     = inputs['maturity'].values[0] 

In [None]:
generate_heston_paths(S_0, T, r, kappa, theta, v_0, rho, xi, 
                          100, 10)

(10,)


18.315317082595126