# Summary of ES & EZ data

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as si

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import r2_score

import tensorflow as tf
from tensorflow import keras

from tabulate import tabulate
np.set_printoptions(precision = 3, suppress = True)

In [2]:
# Read in the data and keep the relevant features
ES_EZ = pd.read_csv('/Users/gaojinglun/Desktop/RSG/ES_EZ.csv')
ES_EZ = ES_EZ.drop(['Unnamed: 0'], axis = 1)
df = ES_EZ[['Put.Call', 'futures.price', 'Strike.Price', 'Time.to.maturity',
            'Risk.Free.Rate', 'Historical Vol', 'Implied.Volatility', 'Settlement']]
df.head()

Unnamed: 0,Put.Call,futures.price,Strike.Price,Time.to.maturity,Risk.Free.Rate,Historical Vol,Implied.Volatility,Settlement
0,C,3259.0,2200,0.213699,0.0154,0.008335,0.338267,1059.0
1,C,3259.0,2250,0.213699,0.0154,0.008335,0.326829,1009.1
2,C,3259.0,2270,0.213699,0.0154,0.008335,0.323456,989.1
3,C,3259.0,2300,0.213699,0.0154,0.008335,0.317834,959.2
4,C,3259.0,2310,0.213699,0.0154,0.008335,0.314282,949.2


In [3]:
ES_EZ_call = df[df['Put.Call'] == 'C']
ES_EZ_call = ES_EZ_call.drop(['Put.Call'], axis = 1)
ES_EZ_put = df[df['Put.Call'] == 'P']
ES_EZ_put = ES_EZ_put.drop(['Put.Call'], axis = 1)

In [4]:
print('There are {} call options and {} put options in the ES&EZ data.'.format(
    np.sum(ES_EZ['Put.Call'] == 'C'), 
    ES_EZ.shape[0] -np.sum(ES_EZ['Put.Call'] == 'C')))

There are 13708 call options and 16263 put options in the ES&EZ data.


In [5]:
Call_temp = 100 * ES_EZ_call['Strike.Price'].values / ES_EZ_call['futures.price'].values
Call_ITM_idx = Call_temp < 95.2
Call_ATM_idx = np.logical_and(Call_temp < 103, Call_temp >= 95.2)
Call_OTM_idx = Call_temp > 103
print('The number of ITM Call options is', np.sum(Call_ITM_idx))
print('The number of ATM Call options is', np.sum(Call_ATM_idx))
print('The number of OTM Call options is', np.sum(Call_OTM_idx))

The number of ITM Call options is 6678
The number of ATM Call options is 2223
The number of OTM Call options is 4807


In [6]:
Put_temp = 100 * ES_EZ_put['Strike.Price'].values / ES_EZ_put['futures.price'].values
Put_ITM_idx = Put_temp > 103
Put_ATM_idx = np.logical_and(Put_temp < 103, Put_temp >= 95.2)
Put_OTM_idx = Put_temp < 95.2
print('The number of ITM Call options is', np.sum(Put_ITM_idx))
print('The number of ATM Call options is', np.sum(Put_ATM_idx))
print('The number of OTM Call options is', np.sum(Put_OTM_idx))

The number of ITM Call options is 3123
The number of ATM Call options is 2234
The number of OTM Call options is 10906


In [7]:
def black_scholes_call_option(S, K, T, q, r, sigma):
    '''
    S: Stock price
    K: Strike price
    T: Maturity
    q: Dividend rate
    r: Risk free rate
    sigma: Volatility
    '''
    
    d1 = (np.log(S / K) + (r - q + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    
    call = (S * np.exp(-q * T) * si.norm.cdf(d1) - K * np.exp(-r * T) * si.norm.cdf(d2))
    
    return call

def black_scholes_put_option(S, K, T, q, r, sigma):
    '''
    S: Stock price
    K: Strike price
    T: Maturity
    q: Dividend rate
    r: Risk free rate
    sigma: Volatility
    '''
    
    d1 = (np.log(S / K) + (r - q + 0.5 * sigma ** 2) * T) / (sigma * np.sqrt(T))
    d2 = d1 - sigma * np.sqrt(T)
    
    put = K * np.exp(-r * T) * si.norm.cdf(-d2) - S * np.exp(-q * T) * si.norm.cdf(-d1)
    
    return put

def Call_Results(Call_Pred):
    '''
    Get the results for Call options (TIM, ATM, OTM, and Overall) in terms of the R^2 
    '''
    return (
        np.round(r2_score(Call_ITM, Call_Pred[Call_ITM_idx]), 5),
        np.round(r2_score(Call_ATM, Call_Pred[Call_ATM_idx]), 5),
        np.round(r2_score(Call_OTM, Call_Pred[Call_OTM_idx]), 5),
        np.round(r2_score(Call_ALL, Call_Pred), 5)
    )

def Put_Results(Put_Pred):
    '''
    Get the results for Put options (TIM, ATM, OTM, and Overall) in terms of the R^2 
    '''
    return (
        np.round(r2_score(Put_ITM, Put_Pred[Put_ITM_idx]), 5),
        np.round(r2_score(Put_ATM, Put_Pred[Put_ATM_idx]), 5),
        np.round(r2_score(Put_OTM, Put_Pred[Put_OTM_idx]), 5),
        np.round(r2_score(Put_ALL, Put_Pred), 5)   
    )

## Results of BS with implied volatility as input

In [8]:
# Test the performances of BS model with the implied vol as input
BS_Call_ImpliedVol = black_scholes_call_option(
    ES_EZ_call['futures.price'], 
    ES_EZ_call['Strike.Price'], 
    ES_EZ_call['Time.to.maturity'], 
    np.zeros(ES_EZ_call.shape[0]), 
    ES_EZ_call['Risk.Free.Rate'], 
    ES_EZ_call['Implied.Volatility']
)
BS_PUT_ImpliedVol = black_scholes_put_option(
    ES_EZ_put['futures.price'], 
    ES_EZ_put['Strike.Price'], 
    ES_EZ_put['Time.to.maturity'], 
    np.zeros(ES_EZ_put.shape[0]), 
    ES_EZ_put['Risk.Free.Rate'], 
    ES_EZ_put['Implied.Volatility']
)

print('The total variance explained by BS is {} for the Call Options'.format(
    np.round(r2_score(ES_EZ_call['Settlement'], BS_Call_ImpliedVol), 5))
     )
print('The total variance explained by BS is {} for the Put Options'.format(
    np.round(r2_score(ES_EZ_put['Settlement'], BS_PUT_ImpliedVol), 5))
     )

The total variance explained by BS is 0.9994 for the Call Options
The total variance explained by BS is 0.999 for the Put Options


## Results of BS with historical volatility as input

In [9]:
# Test the performances of BS model with historical vol as input
BS_Call_HistVol = black_scholes_call_option(
    ES_EZ_call['futures.price'], 
    ES_EZ_call['Strike.Price'], 
    ES_EZ_call['Time.to.maturity'], 
    np.zeros(ES_EZ_call.shape[0]), 
    ES_EZ_call['Risk.Free.Rate'], 
    ES_EZ_call['Historical Vol']
)
BS_PUT_HistVol = black_scholes_put_option(
    ES_EZ_put['futures.price'], 
    ES_EZ_put['Strike.Price'], 
    ES_EZ_put['Time.to.maturity'], 
    np.zeros(ES_EZ_put.shape[0]), 
    ES_EZ_put['Risk.Free.Rate'], 
    ES_EZ_put['Historical Vol']
)

print('The total variance explained by BS is {} for the Call Options'.format(
    np.round(r2_score(ES_EZ_call['Settlement'], BS_Call_HistVol), 5))
     )
print('The total variance explained by BS is {} for the Put Options'.format(
    np.round(r2_score(ES_EZ_put['Settlement'], BS_PUT_HistVol), 5))
     )

The total variance explained by BS is 0.96737 for the Call Options
The total variance explained by BS is 0.84061 for the Put Options


## Results of ANN

In [10]:
ANN_X_Call = ES_EZ_call[['futures.price', 'Strike.Price', 'Time.to.maturity', 'Risk.Free.Rate', 'Historical Vol']]
ANN_y_Call = ES_EZ_call['Implied.Volatility']
# Standardize the features
scaler_Call_X = MinMaxScaler().fit(ANN_X_Call)
X_Call = pd.DataFrame(scaler_Call_X.transform(ANN_X_Call), columns = ANN_X_Call.columns.values) 

scaler_Call_y = MinMaxScaler().fit(ANN_y_Call.values.reshape(-1, 1))
y_Call = scaler_Call_y.transform(ANN_y_Call.values.reshape(-1, 1))

In [11]:
path = "/Users/gaojinglun/Desktop/RSG/1.5ANN_call_Train_On_ESEZ_withLR0.001"
ANN_call_model = keras.models.load_model(path)

In [12]:
ANN_call = scaler_Call_y.inverse_transform(ANN_call_model.predict(X_Call))
print('The total variance of implied volatility explained by ANN is {} for the Call Options'.format(
    np.round(r2_score(ANN_y_Call, ANN_call), 5)
))

The total variance of implied volatility explained by ANN is 0.93197 for the Call Options


In [13]:
ANN_X_Put = ES_EZ_put[['futures.price', 'Strike.Price', 'Time.to.maturity', 'Risk.Free.Rate', 'Historical Vol']]
ANN_y_Put = ES_EZ_put['Implied.Volatility']
# Standardize the features
scaler_Put_X = MinMaxScaler().fit(ANN_X_Put)
X_Put = pd.DataFrame(scaler_Put_X.transform(ANN_X_Put), columns = ANN_X_Put.columns.values) 

scaler_Put_y = MinMaxScaler().fit(ANN_y_Put.values.reshape(-1, 1))
y_Put = scaler_Put_y.transform(ANN_y_Put.values.reshape(-1, 1))

In [14]:
path2 = "/Users/gaojinglun/Desktop/RSG/1.5ANN_put_Train_On_ESEZ_withLR0.001"
ANN_put_model = keras.models.load_model(path2)

In [15]:
ANN_put = scaler_Put_y.inverse_transform(ANN_put_model.predict(X_Put))
print('The total variance of implied volatility explained by ANN is {} for the Put Options'.format(
      np.round(r2_score(ANN_y_Put, ANN_put), 5)))

The total variance of implied volatility explained by ANN is 0.94338 for the Put Options


In [16]:
# Test the performances of ANN 
ANN_Call_Pred = black_scholes_call_option(
    ES_EZ_call['futures.price'], 
    ES_EZ_call['Strike.Price'], 
    ES_EZ_call['Time.to.maturity'], 
    np.zeros(ES_EZ_call.shape[0]), 
    ES_EZ_call['Risk.Free.Rate'], 
    ANN_call.reshape(-1)
)
ANN_PUT_Pred = black_scholes_put_option(
    ES_EZ_put['futures.price'], 
    ES_EZ_put['Strike.Price'], 
    ES_EZ_put['Time.to.maturity'], 
    np.zeros(ES_EZ_put.shape[0]), 
    ES_EZ_put['Risk.Free.Rate'], 
    ANN_put.reshape(-1)
)

print('The total variance explained by BS is {} for the Call Options'.format(
    np.round(r2_score(ES_EZ_call['Settlement'], ANN_Call_Pred), 5))
     )
print('The total variance explained by BS is {} for the Put Options'.format(
    np.round(r2_score(ES_EZ_put['Settlement'], ANN_PUT_Pred), 5))
     )

The total variance explained by BS is 0.99857 for the Call Options
The total variance explained by BS is 0.99543 for the Put Options


### Results of ANN with addtional inputs

In [17]:
# ES_EZ_call = ES_EZ_call.set_index(np.arange(ES_EZ_call.shape[0]))
# # Add the moneyness indicator 
# Call_moneyness = np.array(['ITM'] * ES_EZ_call.shape[0])
# Call_moneyness[Call_ATM_idx] = 'ATM'
# Call_moneyness[Call_OTM_idx] = 'OTM'
# ES_EZ_call = pd.concat([ES_EZ_call, pd.get_dummies(Call_moneyness)], axis = 1)
# ES_EZ_call.head()

In [18]:
# ES_EZ_call_X = ES_EZ_call.drop(['Put.Call', 'Implied.Volatility'], axis = 1)
# ES_EZ_call_y = ES_EZ_call['Implied.Volatility']
# # Standardize the features
# scalerX = MinMaxScaler().fit(ES_EZ_call_X)
# ES_EZ_call_X = pd.DataFrame(scalerX.transform(ES_EZ_call_X), columns = ES_EZ_call_X.columns.values) 

# scalery = MinMaxScaler().fit(ES_EZ_call_y.values.reshape(-1, 1))
# ES_EZ_call_y = scalery.transform(ES_EZ_call_y.values.reshape(-1, 1))

In [19]:
# path3 = "/Users/gaojinglun/Desktop/RSG/1.5ANN_call_Train_On_ESEZ_Additional_Inputs_withLR0.001"
# ANN_call_add = keras.models.load_model(path3)

In [20]:
# Call_Results(ANN_call_add.predict())

### Summary

In [21]:
Call_ITM = ES_EZ_call['Settlement'][Call_ITM_idx]
Call_ATM = ES_EZ_call['Settlement'][Call_ATM_idx]
Call_OTM = ES_EZ_call['Settlement'][Call_OTM_idx]
Call_ALL = ES_EZ_call['Settlement']

Put_ITM = ES_EZ_put['Settlement'][Put_ITM_idx]
Put_ATM = ES_EZ_put['Settlement'][Put_ATM_idx]
Put_OTM = ES_EZ_put['Settlement'][Put_OTM_idx]
Put_ALL = ES_EZ_put['Settlement']

In [22]:
BS_Call_ITM, BS_Call_ATM, BS_Call_OTM, BS_Call_ALL = Call_Results(BS_Call_ImpliedVol)
BS_Put_ITM, BS_Put_ATM, BS_Put_OTM, BS_Put_ALL = Put_Results(BS_PUT_ImpliedVol)

BS_Call_ITM_hist, BS_Call_ATM_hist, BS_Call_OTM_hist, BS_Call_ALL_hist = Call_Results(BS_Call_HistVol)
BS_Put_ITM_hist, BS_Put_ATM_hist, BS_Put_OTM_hist, BS_Put_ALL_hist = Put_Results(BS_PUT_HistVol)

ANN_Call_ITM, ANN_Call_ATM, ANN_Call_OTM, ANN_Call_ALL = Call_Results(ANN_Call_Pred)
ANN_Put_ITM, ANN_Put_ATM, ANN_Put_OTM, ANN_Put_ALL = Put_Results(ANN_PUT_Pred)

In [23]:
table = pd.DataFrame({
    'ITM Call': [BS_Call_ITM, BS_Call_ITM_hist, ANN_Call_ITM],
    'ATM Call': [BS_Call_ATM, BS_Call_ATM_hist, ANN_Call_ATM],
    'OTM Call': [BS_Call_OTM, BS_Call_OTM_hist, ANN_Call_OTM],
    'Overall Call': [BS_Call_ALL, BS_Call_ALL_hist, ANN_Call_ALL],
    'ITM Put': [BS_Put_ITM, BS_Put_ITM_hist, ANN_Put_ITM],
    'ATM Put': [BS_Put_ATM, BS_Put_ATM_hist, ANN_Put_ATM],
    'OTM Put': [BS_Put_OTM, BS_Put_OTM_hist, ANN_Put_OTM],
    'Overall Put': [BS_Put_ALL, BS_Put_ALL_hist, ANN_Put_ALL]
},
    index = ['BS Implied Vol', 'BS Hist Vol', 'ANN on ES&EZ']
)

In [24]:
print(tabulate(table, headers = 'keys', tablefmt = 'plain'))

                  ITM Call    ATM Call    OTM Call    Overall Call    ITM Put    ATM Put    OTM Put    Overall Put
BS Implied Vol     0.99881     0.98681     0.99612         0.9994     0.99748    0.99219    0.99795        0.999
BS Hist Vol        0.97355    -1.49061    -0.4795          0.96737    0.90063   -1.8856    -0.53521        0.84061
ANN on ES&EZ       0.99798     0.94411     0.94344         0.99857    0.99417    0.94642    0.95953        0.99543
