# Imports

In [1]:
import os, sys
sys.path.append('..') # Parent directory in path
from time import time, sleep
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
import scipy.stats as stats
from scipy.interpolate import interp1d
from scipy.optimize import curve_fit
import statsmodels.api as sm

pd.set_option("display.precision", 4)
#import pandas_market_calendars as mcal # NYSE Calendar

import matplotlib.pyplot as plt
plt.style.use('seaborn')
plt.rc("font", **{"size": 14})
plt.rc("figure", **{"figsize": (16,10)})
# import matplotlib.pylab as pl
from matplotlib import cm
from functions.data import add_attributes

# Data
### Pick a specific expiration

In [2]:
all_data = pd.concat([pd.read_hdf(os.path.join("..", "data", "spx_iv_db_1.h5")), 
                      pd.read_hdf(os.path.join("..", "data", "spx_iv_db_2.h5"))])
exp_date = "2017-12-15"
df = all_data[all_data["EXP"]==exp_date].dropna().reset_index(drop=True)
df

Unnamed: 0,TS,EXP,STRIKE,TYPE,BID_CLOSE,MID_CLOSE,ASK_CLOSE,VOLUME,OPEN_INT,UNDERLYING_PRICE,DIFF,RANK,BUS_DAYS,CAL_DAYS,RATE_OLS,RATE_OPEN_INT,RATE_VOLUME,RATE_MONEYNESS,F_T_OLS,F_T_OPEN_INT,F_T_VOLUME,F_T_MONEYNESS,RATE,F_T,IV_BID,IV_MID,IV_ASK
0,2017-07-03,2017-12-15,2425,C,74.4,75.350,76.30,11,15111,2429.01,4.01,0,116,165,0.0148,0.0148,0.0152,0.0152,2421.7557,2421.7434,2421.7357,2421.7474,0.0148,2421.7434,0.1167,0.1182,0.1196
1,2017-07-03,2017-12-15,2425,P,77.7,78.600,79.50,25,13822,2429.01,4.01,0,116,165,0.0148,0.0148,0.0152,0.0152,2421.7557,2421.7434,2421.7357,2421.7474,0.0148,2421.7434,0.1168,0.1182,0.1196
2,2017-07-03,2017-12-15,2450,P,87.8,88.750,89.70,0,5926,2429.01,20.99,1,116,165,0.0148,0.0148,0.0152,0.0152,2421.7557,2421.7434,2421.7357,2421.7474,0.0148,2421.7434,0.1114,0.1128,0.1143
3,2017-07-03,2017-12-15,2450,C,59.8,60.700,61.60,7,19022,2429.01,20.99,1,116,165,0.0148,0.0148,0.0152,0.0152,2421.7557,2421.7434,2421.7357,2421.7474,0.0148,2421.7434,0.1115,0.1129,0.1143
4,2017-07-03,2017-12-15,2400,P,68.9,69.800,70.70,501,32564,2429.01,29.01,2,116,165,0.0148,0.0148,0.0152,0.0152,2421.7557,2421.7434,2421.7357,2421.7474,0.0148,2421.7434,0.1223,0.1237,0.1251
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15278,2017-12-14,2017-12-15,2540,P,0.1,0.200,0.30,20,31619,2652.06,112.06,44,1,1,0.2687,-0.0306,0.3629,0.4772,2654.2863,2654.3611,2654.3095,2654.3193,-0.0306,2654.3611,0.2825,0.3077,0.3253
15279,2017-12-14,2017-12-15,2535,P,0.1,0.200,0.30,73,15733,2652.06,117.06,46,1,1,0.2687,-0.0306,0.3629,0.4772,2654.2863,2654.3611,2654.3095,2654.3193,-0.0306,2654.3611,0.2936,0.3197,0.3378
15280,2017-12-14,2017-12-15,2530,P,0.1,0.200,0.30,16,15516,2652.06,122.06,48,1,1,0.2687,-0.0306,0.3629,0.4772,2654.2863,2654.3611,2654.3095,2654.3193,-0.0306,2654.3611,0.3048,0.3316,0.3502
15281,2017-12-14,2017-12-15,2525,P,0.1,0.200,0.30,1147,67725,2652.06,127.06,50,1,1,0.2687,-0.0306,0.3629,0.4772,2654.2863,2654.3611,2654.3095,2654.3193,-0.0306,2654.3611,0.3159,0.3435,0.3627


In [3]:
# Add ATM_VOLS

ATM_vols = df[df["RANK"]==0]
df = pd.merge(df, ATM_vols[["TS", "TYPE", "IV_BID", "IV_ASK"]], left_on=["TS", "TYPE"], right_on=["TS", "TYPE"], 
              suffixes=(None, "_ATM"), how="left")

# Add useful columns
df["MONEYNESS"] = df["UNDERLYING_PRICE"] - df["STRIKE"]

df["MONEYNESS_F"] = df["STRIKE"]/df["F_T"]
df["LOG_MONEYNESS_F"] = np.log(df["MONEYNESS_F"])
df["LOG_MONEYNESS_F_STANDARD_TIME"] = df["LOG_MONEYNESS_F"] / np.sqrt(df["CAL_DAYS"])

# df["LOG_MONEYNESS_F_STANDARD_STD"] = df["LOG_MONEYNESS_F"] / (np.sqrt(df["CAL_DAYS"])*(df["IV_BID_ATM"]+df["IV_ASK_ATM"])/2)

df["IV_BID_DIFF"] = df["IV_BID"] - df["IV_BID_ATM"]
df["IV_ASK_DIFF"] = df["IV_ASK"] - df["IV_ASK_ATM"]

df["IV_BID_RATIO"] = df["IV_BID"]/df["IV_BID_ATM"]
df["IV_ASK_RATIO"] = df["IV_ASK"]/df["IV_ASK_ATM"]

# Greeks
raw_data = pd.concat([pd.read_hdf(os.path.join("..", "data", "vol1.h5")), 
                      pd.read_hdf(os.path.join("..", "data", "vol2.h5"))])
raw_data = raw_data[(raw_data["EXP"]==exp_date) & (raw_data["TS"]!=exp_date)].dropna().reset_index(drop=True)

# Parameters
fit_data = pd.read_excel(os.path.join("..", "processed_data", "iv_parameter.xlsx"), sheet_name=exp_date, index_col=0)

# Features

In [4]:
def avg_greeks(df):
    df['DELTA_AVG'] = df['DELTA'].mean()
    df['GAMMA_AVG'] = df['GAMMA'].mean()
    df['THETA_AVG'] = df['THETA'].mean()
    df['VEGA_AVG'] = df['VEGA'].mean()
    return df

new_data = raw_data.groupby("TS").apply(avg_greeks)
new_data = new_data.groupby("TS").head(1)[["TS", "DELTA_AVG", "GAMMA_AVG", "THETA_AVG", "VEGA_AVG"]].dropna().reset_index(drop=True)

In [5]:
new_data = pd.merge(new_data, fit_data.shift(-1).reset_index().rename(columns={"index":"TS"}),left_on=["TS"], right_on=["TS"], how="left")
new_data = new_data.set_index("TS")
new_data = new_data.dropna()

In [6]:
new_data.corr().loc[["DELTA_AVG", "GAMMA_AVG", "THETA_AVG", "VEGA_AVG"], ["a", "b", "c", "d", "e"]]

Unnamed: 0,a,b,c,d,e
DELTA_AVG,0.4852,0.4419,-0.7822,0.3235,-0.458
GAMMA_AVG,0.1253,0.4374,-0.5837,0.3996,-0.4479
THETA_AVG,0.3419,-0.2817,-0.264,-0.3778,0.28
VEGA_AVG,-0.634,-0.4505,0.8413,-0.259,0.4693


In [7]:
def volume_weight_avg_greeks(df):
    wt = df["VOLUME"]
    df['DELTA_AVG_V'] = df['DELTA'] * wt / wt.sum()
    df['GAMMA_AVG_V'] = df['GAMMA'] * wt / wt.sum()
    df['THETA_AVG_V'] = df['THETA'] * wt / wt.sum()
    df['VEGA_AVG_V'] = df['VEGA'] * wt / wt.sum()
    return df