# 0 Data

In [396]:
import pandas as pd
import numpy as np
import tensorflow as tf

In [397]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [398]:
real_data = pd.read_csv('/content/drive/MyDrive/processed_data.csv')
real_data.head()

Unnamed: 0.1,Unnamed: 0,optionid,securityid,strike,date_traded,contract_price,underlyings_price,contract_volume,days_to_maturity,moneyness,rate,volatility,mean_volatility,reversion,var_of_vol,rho,lambda
0,0,30246878.0,504880.0,27.0,2003-04-24,0.365,23.108701,5109.0,0.405479,0.855878,0.012725,0.344999,0.027174,2.514547,0.136556,-0.332888,0.860253
1,1,30246882.0,504880.0,28.0,2003-04-24,0.225,23.108701,157.0,0.405479,0.825311,0.012725,0.344999,0.035583,1.412536,0.043306,-0.189034,0.83119
2,2,30246886.0,504880.0,29.0,2003-04-24,0.131,23.108701,122.0,0.405479,0.796852,0.012725,0.344999,0.016089,2.366108,0.081914,-0.164918,0.519741
3,3,30246890.0,504880.0,30.0,2003-04-24,0.074,23.108701,6020.0,0.405479,0.77029,0.012725,0.344999,0.00422,0.108479,0.546855,-0.167983,0.899078
4,4,30262356.0,504889.0,2.8,2003-04-24,0.056,2.4555,1000.0,0.405479,0.876964,0.012725,0.26561,0.010153,3.142642,0.640966,-0.430974,1.020605


In [400]:
def generate_inputs_nn(dim, num_samples):
    """
    Returns num_samples numbers of 7 + 2*dim input features 
    which is later fed into the neural networks.

    Args:
    - dim: int, the number of underlyings in the basket
    - num_samples: int, number of samples to be generated
    
    """
    def rand(num = num_samples):
      return np.random.random(num)

    def corr(dim):
      return -0.05 - 0.7 * rand(dim) # 

    def spots(dim): # Initial spot price of each sampled from historical data
      return np.array(real_data['underlyings_price'].sample(n=dim, replace=True))

    maturity  = np.array(real_data['days_to_maturity'].sample(n = num_samples, replace = True))
    rate    = np.array(real_data['rate'].sample(n = num_samples, replace = True))
    strikes  = np.array(real_data['strike'].sample(n = num_samples, replace = True))
    rate_reversion  = 0.01 + 5 * rand()          # kappa
    vol_of_vol    = 0.01 + 0.7 * rand()         # lambda
    long_term_var = 0.001 + 0.05 * rand()         # theta
    initial_var  = 0.001 + 0.05 * rand()         # sqrt(v0)
    inputs      = np.array([maturity, strikes, initial_var, long_term_var, rate_reversion,
                               vol_of_vol, rate]).T.tolist()
    for i in range(len(inputs)):
        spot = spots(dim)
        inputs[i].insert(0, spot)
        inputs[i].insert(1, corr(dim))        

    inputs = np.array(inputs, dtype = object)
    return inputs

In [402]:
dim  = 7     # Dimension of basket (number of stocks)
num_samples = 50  
inputs_array = generate_inputs_nn(dim,num_samples)
inputs = pd.DataFrame(inputs_array, columns = ['underlyings_price', 'rho', 'days_to_maturity', 
                        'strike', 'volatility','mean_volatility','reversion', 'vol_of_var','rate'])
inputs.head()

Unnamed: 0,underlyings_price,rho,days_to_maturity,strike,volatility,mean_volatility,reversion,vol_of_var,rate
0,"[14.750250244140624, 31.99110107421875, 2.6302...","[-0.6035891255314515, -0.4435256870801581, -0....",0.158904,3.8,0.02461,0.027264,3.58713,0.051465,0.003195
1,"[32.9052001953125, 63.046298828125, 52.6589990...","[-0.6003797340669739, -0.6982510198841568, -0....",0.084932,4.64,0.033541,0.005324,2.765051,0.013205,0.009192
2,"[33.4027001953125, 3.3667999267578126, 130.595...","[-0.14624770174742077, -0.49320265939487007, -...",0.068493,69.75,0.042141,0.007901,1.366419,0.462005,0.001994
3,"[43.7925, 220.21359375, 13.4925, 57.8349023437...","[-0.2030593641861862, -0.3634292228820676, -0....",0.10137,34.5,0.035722,0.050095,4.637565,0.255585,0.001348
4,"[16.101500244140624, 112.103095703125, 28.1962...","[-0.3623450506847152, -0.6299581084159465, -0....",0.20274,5.12,0.04434,0.01585,2.541071,0.659078,0.002694


# 1 Model for Scalar Values

In [403]:
def generate_heston_paths(S, T, r, kappa, theta, v_0, rho, xi, 
                          steps, num_sims):  
    '''
    Produces result for a single heston run.

    Args:
    - S: np.array, contains spot prices of the underlying assets in the basket
    - T: float, days to maturity divided by 365
    - r: float, the risk-free rate
    - kappa: float, mean reversion rate of variance
    - theta: float, long-term average price variance
    - v_0: float, initial volatility
    - rho: np.array, contains correlations between each random underlying return process and its associated random volatility process  
    - xi: float, volatility of variance
    - steps: int, num time steps
    - num_sims: int, no. of simulations for each sample 


    '''
    dt = T/steps
    dim = len(S)
    S_t = np.repeat([S], num_sims,axis=0)
    v_t = v_0 + np.zeros(num_sims)[:,np.newaxis]

    for t in range(steps):
        WT1 = np.random.normal(0,1,size=(num_sims,dim))
        WT2 = np.random.normal(0,1,size=(num_sims,dim))
        WT3 = rho * WT1 + np.sqrt(1-rho**2)*WT2

        v_t = np.maximum(v_t, 0)
        S_t = S_t*(np.exp( (r - 0.5*v_t)*dt+ np.sqrt(v_t * dt) * WT1)) 
        v_t = v_t + kappa*(theta-v_t)*dt + xi*np.sqrt(v_t * dt)*WT3    

    S_t = np.mean(S_t,axis = 1)
    S_call = np.exp(-1 * r * T) * np.sum(np.maximum(S_t - K, 0)) / num_sims
    
    return S_call

# 2 Model for Vector Values

In [404]:
def generate_heston_paths_vec(df, steps=1000, num_sims=100000):  
    '''
    Produces result for multiple heston runs for call options only.

    Args:  
        - df: dataframe, containing all parameters
        - steps: int, num time steps
        - num_sims: int, no. of simulations for each sample  

    Output:  
        - result: ndarray, containing average prices over num_sims
    '''  
    N = len(df)
    dt    = df['days_to_maturity'].values /steps 
    S_0   = df['underlyings_price'].values 
    v_0   = df['volatility'].values.astype('float')  
    r     = df['rate'].values.astype('float')  
    theta = df['mean_volatility'].values.astype('float')  
    kappa = df['reversion'].values.astype('float')  
    xi    = df['vol_of_var'].values.astype('float')  
    K     = df['strike'].values.astype('float')[:,np.newaxis]  
    rho   = np.array([x.astype('float64') for x in df['rho'].values])
    T     = df['days_to_maturity'].values.astype('float') 

    dim = len(S_0[0])
    S_t = np.array([np.repeat([S_0[i]], num_sims, axis=0) for i in range(len(S_0))]).transpose(1,2,0)
    v_t = (v_0 + np.zeros((num_sims,1))[:,np.newaxis])


    for t in range(steps):
        WT1 = np.random.normal(0,1,size=(num_sims, dim, N))
        WT2 = np.random.normal(0,1,size=(num_sims,dim, N))
        WT3 = rho.T * WT1 + np.sqrt(1-rho.T**2)*WT2

        v_t = np.maximum(v_t, 0)
        S_t = S_t*(np.exp( ((r- 0.5*v_t)*dt + np.sqrt((v_t * dt).astype('float')) * WT1 ).astype('float')))
        v_t = v_t + kappa*(theta-v_t)*dt + xi*np.sqrt((v_t * dt).astype('float'))*WT3  
        
    S_t = S_t.transpose(2,0,1)
    S_t = np.mean(S_t,axis = 2)
    S_call = np.exp(-1 * r * T) * np.sum(np.maximum(S_t-K,0), axis = 1 )/ num_sims
    
    return S_call

# 3 Test implementation

Test the scalar case.

In [405]:
S_0   = inputs['underlyings_price'].values[0] 
v_0   = inputs['volatility'].values[0] 
r     = inputs['rate'].values[0] 
theta = inputs['mean_volatility'].values[0] 
kappa = inputs['reversion'].values[0] 
xi    = inputs['vol_of_var'].values[0] 
K     = inputs['strike'].values[0] 
rho   = inputs['rho'].values[0] 
T     = inputs['days_to_maturity'].values[0] 

generate_heston_paths(S_0, T, r, kappa, theta, v_0, rho, xi, 
                          100, 1000)

20.67090416842931

Test the vector case.

In [406]:
generate_heston_paths_vec(inputs, steps=100, num_sims=1000)

array([2.06580047e+01, 2.88626794e+01, 0.00000000e+00, 5.10156644e+01,
       2.83876102e+01, 0.00000000e+00, 6.56325257e+00, 0.00000000e+00,
       0.00000000e+00, 6.30495234e+01, 2.05832467e+01, 2.17431547e-01,
       3.09867254e+01, 0.00000000e+00, 5.82422941e+01, 5.68955802e+01,
       0.00000000e+00, 3.58008954e-03, 8.63253013e+01, 3.05207374e+01,
       0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
       0.00000000e+00, 5.12905978e+01, 3.37049898e+01, 8.27501436e+00,
       0.00000000e+00, 0.00000000e+00, 1.55785034e+01, 3.25964477e+01,
       2.14483897e+01, 0.00000000e+00, 6.58572345e-01, 0.00000000e+00,
       1.42670434e+01, 0.00000000e+00, 5.27939582e+01, 0.00000000e+00,
       4.93756441e+01, 1.93799833e+01, 3.87750661e+01, 0.00000000e+00,
       0.00000000e+00, 2.51541375e+01, 6.41401944e+01, 5.59608706e+01,
       0.00000000e+00, 3.61087881e+01])

We can see that the first element of `generate_heston_paths_vec` is approximately the same as `generate_heston_paths`. This verified that our models are correct.