# 0 Data

In [1]:
import os
import pathlib
import numpy as np
import pandas as pd
import tensorflow as tf

In [2]:
parent_path = str(pathlib.Path(os.getcwd()).parent.parent)
print(parent_path)
df = pd.read_csv(os.path.join(parent_path, 'data/data.csv'))
df = df.drop(['Unnamed: 0'], axis=1)
# add average long run volatility (theta)
df['mean_volatility'] = 0.001 + 0.05 * np.random.rand(len(df))

# add mean reversion rate of volatility (kappa)
df['reversion'] = 0.01 + 5 * np.random.rand(len(df))

# add variance of volatility
df['var_of_vol'] = 0.01 + 0.7 * np.random.rand(len(df))

# add correlation between random processes  
df['rho'] = -0.05 - 0.7 * np.random.rand(len(df))

df.sample(5)

df.to_csv(os.path.join(parent_path, 'data/basket_data.csv'))

/Users/customer/projects/UROP-2022


In [3]:
# from google.colab import drive
# drive.mount('/content/drive')
# real_data = pd.read_csv('/content/drive/MyDrive/processed_data.csv')
# real_data.head()

In [3]:
def generate_inputs_nn(real_data, dim, num_samples):
    """
    Returns num_samples numbers of 7 + 2*dim input features 
    which is later fed into the neural networks.

    Args:  
      - real_data: pd dataframe containing the values obtained from wrds
      - dim: int, the number of underlyings in the basket
      - num_samples: number of samples to take  

    Output:
      - inputs: ndarray, 
    
    """
    def rand(num = num_samples):
      return np.random.random(num)

    def corr(dim):
      return -0.05 - 0.7 * rand(dim)

    # Initial spot price of each sampled from historical data
    def spots(dim): 
      return np.array(real_data['underlyings_price'].sample(n=dim, replace=True))

    # randomly sample from historical data
    maturity  = np.array(real_data['days_to_maturity'].sample(n = num_samples, replace = True))
    rate    = np.array(real_data['rate'].sample(n = num_samples, replace = True))
    strikes  = np.array(real_data['strike'].sample(n = num_samples, replace = True))

    # randomly generate parameters
    rate_reversion  = 0.01 + 5 * rand()          # kappa
    vol_of_vol    = 0.01 + 0.7 * rand()         # lambda
    long_term_var = 0.001 + 0.05 * rand()         # theta
    initial_var  = 0.001 + 0.05 * rand()         # sqrt(v0)

    inputs      = np.array([maturity, strikes, initial_var, long_term_var, rate_reversion,
                               vol_of_vol, rate]).T.tolist()
    for i in range(len(inputs)):
        spot = spots(dim)
        inputs[i].insert(0, spot)
        inputs[i].insert(1, corr(dim))        

    inputs = np.array(inputs, dtype = object)
    return inputs

# 1 Model for Scalar Values

In [4]:
def generate_heston_paths(S, T, K, r, kappa, theta, v_0, rho, xi, 
                          steps, num_sims):  
    '''
    Produces result for a single heston run.

    Args:
    - S: np.array, contains spot prices of the underlying assets in the basket
    - T: float, days to maturity divided by 365
    - K: float, strike price
    - r: float, the risk-free rate
    - kappa: float, mean reversion rate of variance
    - theta: float, long-term average price variance
    - v_0: float, initial volatility
    - rho: np.array, contains correlations between each random underlying return process and its associated random volatility process  
    - xi: float, volatility of variance
    - steps: int, num time steps
    - num_sims: int, no. of simulations for each sample 
    '''
    dt = T/steps
    dim = len(S)
    S_t = np.repeat([S], num_sims,axis=0)
    v_t = v_0 + np.zeros(num_sims)[:,np.newaxis]

    for t in range(steps):
        WT1 = np.random.normal(0,1,size=(num_sims,dim))
        WT2 = np.random.normal(0,1,size=(num_sims,dim))
        WT3 = rho * WT1 + np.sqrt(1-rho**2)*WT2

        v_t = np.maximum(v_t, 0)
        S_t = S_t*(np.exp( (r - 0.5*v_t)*dt+ np.sqrt(v_t * dt) * WT1)) 
        v_t = v_t + kappa*(theta-v_t)*dt + xi*np.sqrt(v_t * dt)*WT3    

    S_t = np.mean(S_t,axis = 1)
    S_call = np.exp(-1 * r * T) * np.sum(np.maximum(S_t - K, 0)) / num_sims
    
    return S_call

# 2 Model for Vector Values

In [5]:
def generate_heston_paths_vec(df, num_samples, steps=1000, num_sims=100000):  
    '''
    Produces result for multiple heston runs for call options only.

    Args:  
        - df: dataframe, containing all parameters
        - num_samples:
        - steps: int, num time steps
        - num_sims: int, no. of simulations for each sample  

    Output:  
        - result: ndarray, containing average prices over num_sims
    '''  
    N = len(df)
    dt    = df['days_to_maturity'].values /steps 
    S_0   = df['underlyings_price'].values.tolist() # add tolist 
    v_0   = df['volatility'].values.astype('float')  
    r     = df['rate'].values.astype('float')  
    theta = df['mean_volatility'].values.astype('float')  
    kappa = df['reversion'].values.astype('float')  
    xi    = df['vol_of_var'].values.astype('float')  
    K     = df['strike'].values.astype('float')[:,np.newaxis]  
    rho   = np.array([x.astype('float64') for x in df['rho'].values])
    T     = df['days_to_maturity'].values.astype('float') 

    dim = len(S_0[0])
    S_t = np.tile(S_0, (1,num_sims)).reshape(num_samples,num_sims,dim).transpose(1,2,0)
    v_t = (v_0 + np.zeros((num_sims,1))[:,np.newaxis])


    for t in range(steps):
        WT1 = np.random.normal(0,1,size=(num_sims, dim, N))
        WT2 = np.random.normal(0,1,size=(num_sims, dim, N))
        WT3 = rho.T * WT1 + np.sqrt(1-rho.T**2)*WT2

        v_t = np.maximum(v_t, 0)
        S_t = S_t*(np.exp( ((r- 0.5*v_t)*dt + np.sqrt((v_t * dt).astype('float')) * WT1 ).astype('float')))
        v_t = v_t + kappa*(theta-v_t)*dt + xi*np.sqrt((v_t * dt).astype('float'))*WT3  
        
    S_t = S_t.transpose(2,0,1)
    S_t = np.mean(S_t,axis = 2)
    S_call = np.exp(-1 * r * T) * np.sum(np.maximum(S_t-K,0), axis = 1 )/ num_sims
    
    return S_call

# 3 Test implementation

In [9]:
dim  = 7     # Dimension of basket (number of stocks)
num_samples = 10
inputs_array = generate_inputs_nn(df,dim,num_samples)
inputs = pd.DataFrame(inputs_array, columns = ['underlyings_price', 'rho', 'days_to_maturity', 
                        'strike', 'volatility','mean_volatility','reversion', 'vol_of_var','rate'])
print(inputs.shape)
inputs.head()

(10, 9)


Unnamed: 0,underlyings_price,rho,days_to_maturity,strike,volatility,mean_volatility,reversion,vol_of_var,rate
0,"[142.45, 10.94219970703125, 14.87875, 4.774700...","[-0.5253102894081183, -0.08267862449961322, -0...",7.0,15.6,0.022738,0.050026,3.757348,0.666204,0.024338
1,"[16.427130126953124, 11.81739990234375, 16.604...","[-0.15359162125842643, -0.22446354245062333, -...",16.0,13.2,0.02319,0.039932,2.800199,0.188256,0.001678
2,"[13.388599853515624, 15.79324951171875, 15.905...","[-0.35742871484591116, -0.7080891695097145, -0...",91.0,4.8,0.029186,0.012507,1.904727,0.271797,0.002458
3,"[13.995, 4.774700012207031, 4.701400146484375,...","[-0.3178805513544719, -0.48628350712969587, -0...",8.0,15.8,0.038362,0.0349,2.66674,0.648392,0.051413
4,"[13.92875, 16.734720458984373, 57.633999023437...","[-0.09894896681770923, -0.49947798081183575, -...",303.0,4.7,0.036798,0.008966,1.362616,0.583793,0.054251


Test the scalar case.

In [10]:
S_0   = inputs['underlyings_price'].values[0] 
v_0   = inputs['volatility'].values[0] 
r     = inputs['rate'].values[0] 
theta = inputs['mean_volatility'].values[0] 
kappa = inputs['reversion'].values[0] 
xi    = inputs['vol_of_var'].values[0] 
K     = inputs['strike'].values[0] 
rho   = inputs['rho'].values[0] 
T     = inputs['days_to_maturity'].values[0] 

generate_heston_paths(S_0, T, K, r, kappa, theta, v_0, rho, xi, 
                          100, 1000)

18.77136022203655

Test the vector case.

In [11]:
result = generate_heston_paths_vec(inputs, num_samples, steps=100, num_sims=1000)

We can see that the first element of `generate_heston_paths_vec` is approximately the same as `generate_heston_paths`. This verified that our models are correct.

In [13]:
inputs['contract_price'] = result

In [15]:
def flattenDim(dim, inputs):
    """
    
    Returns a pandas dataframe with assest price and rho expanded.  

    Args:  
        - dim: dimension of assest price basket
        - inputs: pandas dataframe of basket options, containing columns
                  'underlyings_price' and 'rho'  

    Output:  
        - outputs: pandas dataframe
    """
    for i in range(dim):
        inputs[f'Underlying_{i}'] = np.vstack(inputs.underlyings_price.values)[:,i]

    for i in range(dim):
        inputs[f'Rho_{i}'] = np.vstack(inputs.rho.values)[:,i]
    inputs = inputs.drop(['underlyings_price', 'rho'], axis=1)
    return inputs

inputs = flattenDim(7, inputs)
inputs.head()

Unnamed: 0,days_to_maturity,strike,volatility,mean_volatility,reversion,vol_of_var,rate,contract_price,Underlying_0,Underlying_1,...,Underlying_4,Underlying_5,Underlying_6,Rho_0,Rho_1,Rho_2,Rho_3,Rho_4,Rho_5,Rho_6
0,7.0,15.6,0.022738,0.050026,3.757348,0.666204,0.024338,17.028448,142.45,10.9422,...,15.115,17.5255,14.72712,-0.52531,-0.082679,-0.147531,-0.555332,-0.701763,-0.404381,-0.596128
1,16.0,13.2,0.02319,0.039932,2.800199,0.188256,0.001678,1.221938,16.42713,11.8174,...,10.61088,8.01,4.5127,-0.153592,-0.224464,-0.537282,-0.596806,-0.184586,-0.457355,-0.252248
2,91.0,4.8,0.029186,0.012507,1.904727,0.271797,0.002458,7.387472,13.3886,15.79325,...,4.804,7.626,15.61337,-0.357429,-0.708089,-0.138399,-0.389144,-0.244073,-0.157685,-0.149508
3,8.0,15.8,0.038362,0.0349,2.66674,0.648392,0.051413,2.112051,13.995,4.7747,...,15.1235,15.39725,6.6458,-0.317881,-0.486284,-0.465103,-0.405701,-0.068419,-0.643844,-0.523429
4,303.0,4.7,0.036798,0.008966,1.362616,0.583793,0.054251,16.669164,13.92875,16.73472,...,13.394,10.6325,60.833999,-0.098949,-0.499478,-0.105192,-0.135804,-0.09167,-0.506185,-0.340715


In [16]:
inputs.to_csv(os.path.join(parent_path, 'data/small_basket_data.csv'))