In [1]:
import numpy as np
import pandas as pd
from sklearn.utils import resample
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models, layers, activations, initializers

from EnsembleNeuralNetworks import ENN
from BSModel import black_scholes_call_option, black_scholes_put_option

In [2]:
ES_EZ = pd.read_csv('/Users/gaojinglun/Desktop/RSG/data/ES_EZ.csv')
ES_EZ = ES_EZ.drop(['Unnamed: 0'], axis = 1)
df = ES_EZ[['Put.Call', 'Strike.Price', 'Settlement', 'Historical Vol', 
            'futures.price', 'Time.to.maturity', 'Risk.Free.Rate', 'Implied.Volatility']]
df = df.dropna()
df.head()

Unnamed: 0,Put.Call,Strike.Price,Settlement,Historical Vol,futures.price,Time.to.maturity,Risk.Free.Rate,Implied.Volatility
0,C,2200,1059.0,0.008335,3259.0,0.213699,0.0154,0.338267
1,C,2250,1009.1,0.008335,3259.0,0.213699,0.0154,0.326829
2,C,2270,989.1,0.008335,3259.0,0.213699,0.0154,0.323456
3,C,2300,959.2,0.008335,3259.0,0.213699,0.0154,0.317834
4,C,2310,949.2,0.008335,3259.0,0.213699,0.0154,0.314282


In [3]:
n_estimators = 10

### Call Options

In [4]:
ES_EZ_call = df[df['Put.Call'] == 'C']
Call_temp = 100 * ES_EZ_call['Strike.Price'].values / ES_EZ_call['futures.price'].values
Call_ITM_idx = Call_temp < 95.2
Call_ATM_idx = np.logical_and(Call_temp < 103, Call_temp >= 95.2)
Call_OTM_idx = Call_temp > 103
print('The number of ITM Call options is', np.sum(Call_ITM_idx))
print('The number of ATM Call options is', np.sum(Call_ATM_idx))
print('The number of OTM Call options is', np.sum(Call_OTM_idx))

The number of ITM Call options is 6678
The number of ATM Call options is 2223
The number of OTM Call options is 4807


In [5]:
# Add the moneyness indicator 
Call_moneyness = np.array(['ITM'] * ES_EZ_call.shape[0])
Call_moneyness[Call_ATM_idx] = 'ATM'
Call_moneyness[Call_OTM_idx] = 'OTM'

ES_EZ_call_X = ES_EZ_call[['futures.price', 'Strike.Price', 'Time.to.maturity', 
                           'Risk.Free.Rate', 'Historical Vol']]
ES_EZ_call_y = ES_EZ_call['Implied.Volatility']
ES_EZ_call_X.shape

(13708, 5)

In [6]:
ENN_Call = ENN(n_estimators = n_estimators)
ENN_Call.bootstrap(data = ES_EZ_call, stratification = Call_moneyness, trainsize = 0.8)
LR = [1e-2, 1e-3]
xcol = ['futures.price', 'Strike.Price', 'Time.to.maturity', 'Risk.Free.Rate', 'Historical Vol']
ycol = 'Implied.Volatility'
ENN_Call.fit(LR, xcol, ycol)

In [7]:
ENN_Call.get_results()

The training R^2 for model 0 is 0.8885 and 0.8881, respectively
The training R^2 for model 1 is 0.8787 and 0.8672, respectively
The training R^2 for model 2 is 0.9115 and 0.8991, respectively
The training R^2 for model 3 is 0.8693 and 0.8778, respectively
The training R^2 for model 4 is 0.8876 and 0.8872, respectively
The training R^2 for model 5 is 0.8959 and 0.8925, respectively
The training R^2 for model 6 is 0.9549 and 0.9539, respectively
The training R^2 for model 7 is 0.9326 and 0.9165, respectively
The training R^2 for model 8 is 0.9382 and 0.9347, respectively
The training R^2 for model 9 is 0.9213 and 0.9255, respectively


In [8]:
print('The training R^2 for call option is {} +/- {}.'.format(np.round(np.mean(ENN_Call.results[:,0]), 4), 
                                                          np.round(np.std(ENN_Call.results[:,0]), 4)))
print('The testing R^2 for call option is {} +/- {}.'.format(np.round(np.mean(ENN_Call.results[:,1]), 4), 
                                                          np.round(np.std(ENN_Call.results[:,1]), 4)))

The training R^2 for call option is 0.9078 +/- 0.0268.
The testing R^2 for call option is 0.9042 +/- 0.026.


In [9]:
ENN_Call.results

array([[0.8885, 0.8881],
       [0.8787, 0.8672],
       [0.9115, 0.8991],
       [0.8693, 0.8778],
       [0.8876, 0.8872],
       [0.8959, 0.8925],
       [0.9549, 0.9539],
       [0.9326, 0.9165],
       [0.9382, 0.9347],
       [0.9213, 0.9255]])

In [10]:
scalerX_call = MinMaxScaler().fit(ES_EZ_call_X)
ES_EZ_call_X = pd.DataFrame(scalerX_call.transform(ES_EZ_call_X), columns = ES_EZ_call_X.columns.values) 
scalery_call = MinMaxScaler().fit(ES_EZ_call_y.values.reshape(-1, 1))
ES_EZ_call_y = scalery_call.transform(ES_EZ_call_y.values.reshape(-1, 1))

In [11]:
np.round(r2_score(ES_EZ_call_y, ENN_Call.predict(ES_EZ_call_X)), 4)

0.9365

### Put Options

In [12]:
ES_EZ_put = df[df['Put.Call'] == 'P']
Put_temp = 100 * ES_EZ_put['Strike.Price'].values / ES_EZ_put['futures.price'].values
Put_ITM_idx = Put_temp > 103
Put_ATM_idx = np.logical_and(Put_temp < 103, Put_temp >= 95.2)
Put_OTM_idx = Put_temp < 95.2
print('The number of ITM Call options is', np.sum(Put_ITM_idx))
print('The number of ATM Call options is', np.sum(Put_ATM_idx))
print('The number of OTM Call options is', np.sum(Put_OTM_idx))

The number of ITM Call options is 3123
The number of ATM Call options is 2234
The number of OTM Call options is 10906


In [13]:
# Add the moneyness indicator 
Put_moneyness = np.array(['ITM'] * ES_EZ_put.shape[0])
Put_moneyness[Put_ATM_idx] = 'ATM'
Put_moneyness[Put_OTM_idx] = 'OTM'

ES_EZ_put_X = ES_EZ_put[['futures.price', 'Strike.Price', 'Time.to.maturity', 
                           'Risk.Free.Rate', 'Historical Vol']]
ES_EZ_put_y = ES_EZ_put['Implied.Volatility']
ES_EZ_put_X.shape

(16263, 5)

In [14]:
ENN_Put = ENN(n_estimators = n_estimators)
ENN_Put.bootstrap(data = ES_EZ_put, stratification = Put_moneyness, trainsize = 0.8)
LR = [1e-2, 1e-3]
xcol = ['futures.price', 'Strike.Price', 'Time.to.maturity', 'Risk.Free.Rate', 'Historical Vol']
ycol = 'Implied.Volatility'
ENN_Put.fit(LR, xcol, ycol)

In [15]:
ENN_Put.get_results()

The training R^2 for model 0 is 0.9492 and 0.9442, respectively
The training R^2 for model 1 is 0.9467 and 0.9448, respectively
The training R^2 for model 2 is 0.9565 and 0.9559, respectively
The training R^2 for model 3 is 0.9507 and 0.9483, respectively
The training R^2 for model 4 is 0.9511 and 0.9443, respectively
The training R^2 for model 5 is 0.8906 and 0.8862, respectively
The training R^2 for model 6 is 0.9346 and 0.9322, respectively
The training R^2 for model 7 is 0.9483 and 0.946, respectively
The training R^2 for model 8 is 0.9477 and 0.9464, respectively
The training R^2 for model 9 is 0.9166 and 0.9159, respectively


In [16]:
print('The training R^2 for put option is {} +/- {}.'.format(np.round(np.mean(ENN_Put.results[:,0]), 4), 
                                                          np.round(np.std(ENN_Put.results[:,0]), 4)))
print('The testing R^2 for put option is {} +/- {}.'.format(np.round(np.mean(ENN_Put.results[:,1]), 4), 
                                                          np.round(np.std(ENN_Put.results[:,1]), 4)))

The training R^2 for put option is 0.9392 +/- 0.0194.
The testing R^2 for put option is 0.9364 +/- 0.0197.


In [17]:
ENN_Put.results

array([[0.9492, 0.9442],
       [0.9467, 0.9448],
       [0.9565, 0.9559],
       [0.9507, 0.9483],
       [0.9511, 0.9443],
       [0.8906, 0.8862],
       [0.9346, 0.9322],
       [0.9483, 0.946 ],
       [0.9477, 0.9464],
       [0.9166, 0.9159]])

In [18]:
scalerX_put = MinMaxScaler().fit(ES_EZ_put_X)
ES_EZ_put_X = pd.DataFrame(scalerX_put.transform(ES_EZ_put_X), columns = ES_EZ_put_X.columns.values) 
scalery_put = MinMaxScaler().fit(ES_EZ_put_y.values.reshape(-1, 1))
ES_EZ_put_y = scalery_put.transform(ES_EZ_put_y.values.reshape(-1, 1))

In [19]:
np.round(r2_score(ES_EZ_put_y, ENN_Put.predict(ES_EZ_put_X)), 4)

0.9596

### Summary

In [20]:
Call_ITM = ES_EZ_call['Settlement'][Call_ITM_idx]
Call_ATM = ES_EZ_call['Settlement'][Call_ATM_idx]
Call_OTM = ES_EZ_call['Settlement'][Call_OTM_idx]
Call_ALL = ES_EZ_call['Settlement']

Put_ITM = ES_EZ_put['Settlement'][Put_ITM_idx]
Put_ATM = ES_EZ_put['Settlement'][Put_ATM_idx]
Put_OTM = ES_EZ_put['Settlement'][Put_OTM_idx]
Put_ALL = ES_EZ_put['Settlement']

def Call_Results(Call_Pred):
    '''
    Get the results for Call options (TIM, ATM, OTM, and Overall) in terms of the R^2 
    '''
    return (
        np.round(r2_score(Call_ITM, Call_Pred[Call_ITM_idx]), 5),
        np.round(r2_score(Call_ATM, Call_Pred[Call_ATM_idx]), 5),
        np.round(r2_score(Call_OTM, Call_Pred[Call_OTM_idx]), 5),
        np.round(r2_score(Call_ALL, Call_Pred), 5)
    )

def Put_Results(Put_Pred):
    '''
    Get the results for Put options (TIM, ATM, OTM, and Overall) in terms of the R^2 
    '''
    return (
        np.round(r2_score(Put_ITM, Put_Pred[Put_ITM_idx]), 5),
        np.round(r2_score(Put_ATM, Put_Pred[Put_ATM_idx]), 5),
        np.round(r2_score(Put_OTM, Put_Pred[Put_OTM_idx]), 5),
        np.round(r2_score(Put_ALL, Put_Pred), 5)   
    )

In [21]:
ENN_call = scalery_call.inverse_transform(ENN_Call.predict(ES_EZ_call_X))
ENN_put = scalery_put.inverse_transform(ENN_Put.predict(ES_EZ_put_X))

In [22]:
# Test the performances of ENN 
ENN_Call_Pred = black_scholes_call_option(
    ES_EZ_call['futures.price'], 
    ES_EZ_call['Strike.Price'], 
    ES_EZ_call['Time.to.maturity'], 
    np.zeros(ES_EZ_call.shape[0]), 
    ES_EZ_call['Risk.Free.Rate'], 
    ENN_call.reshape(-1)
)
ENN_PUT_Pred = black_scholes_put_option(
    ES_EZ_put['futures.price'], 
    ES_EZ_put['Strike.Price'], 
    ES_EZ_put['Time.to.maturity'], 
    np.zeros(ES_EZ_put.shape[0]), 
    ES_EZ_put['Risk.Free.Rate'], 
    ENN_put.reshape(-1)
)

print('The total variance explained by BS is {} for the Call Options'.format(
    np.round(r2_score(ES_EZ_call['Settlement'], ENN_Call_Pred), 5))
     )
print('The total variance explained by BS is {} for the Put Options'.format(
    np.round(r2_score(ES_EZ_put['Settlement'], ENN_PUT_Pred), 5))
     )

The total variance explained by BS is 0.99868 for the Call Options
The total variance explained by BS is 0.99651 for the Put Options


In [23]:
Call_Results(ENN_Call_Pred)

(0.99826, 0.95193, 0.9297, 0.99868)

In [24]:
Put_Results(ENN_PUT_Pred)

(0.9948, 0.95646, 0.97637, 0.99651)