<h1>Adding benchmarks and merging all predictions</h1>
<p>Be sure to run LSTM, MLP, and MLP-GARCH first</p>

In [1]:
import pandas as pd
import numpy as np
from scipy.stats import norm

In [2]:
def lag_features(df, features, seq_length):
    """Transforms a raw 2D dataframe of option data into 2D dataframe ofsequence data.
    Last 2 indexes per sequence are bid and ask price. The len(features)*seq_length
    features before are sequences of features"""
    df = df.sort_values(["Expire_date", "Strike", "Ttl"], ascending = [True, True, False])
    
    # Adding lag for naive benchmarking
    df["Naive"] = df["Price"].shift(1)

    for step in range(seq_length)[::-1]:
        for feature in features:
            df[feature + "-" + str(step)] = df[feature].shift(step)
    
    df["Check_strike"] = df["Strike"] == df["Strike"].shift(seq_length-1)
    df["Check_expire"] = df["Expire_date"] == df["Expire_date"].shift(seq_length-1)
    df = df[(df["Check_strike"] == True) & (df["Check_expire"] == True)]
    df = df.drop(["Check_strike", "Check_expire"], axis=1)
    #df[["Bid_strike_last", "Ask_strike_last"]] = df[["Bid_strike", "Ask_strike"]]
    #df[["Bid_last", "Ask_last"]] = df[["Bid", "Ask"]]
    df["Price_last"] = df["Price"]
    df = df.sort_values(["Quote_date"], ascending = [True])
    return df

    
def read_file(file):
    """Read a single file and return a dataframe"""
    return pd.read_csv(file, skipinitialspace=True)


<h3>Merging with LSTM, MLP, MLP-GARCH prediction</h3>

In [10]:
# Added this to make lag_features work - Sondre
file = f"../data/Predictions/2021_predictions_09-30_10-22.csv"
df_options = read_file(file)
df_options = df_options.rename(columns={"Prediction": "MLP"})
df_options["Expire_date"] = pd.to_datetime(df_options["Quote_date"]) + pd.to_timedelta(df_options["Ttl"], unit="D")

# file with MLP-GARCH predictions
df_merge = read_file(f"../data/Predictions/2021_predictions_09-30_14-55_GARCH.csv")[["Quote_date", "Strike", "Ttl", "Prediction"]]
df_options = pd.merge(df_options, df_merge, how="inner", on=["Quote_date", "Strike", "Ttl"])
df_options = df_options.rename(columns={"Prediction": "MLP-GARCH"})

# file with LSTM predictions
df_merge = read_file("../data/Predictions/2021_predictions_09-26_21-44_LSTM.csv")[["Quote_date", "Strike", "Ttl", "Prediction"]]
df_options = pd.merge(df_options, df_merge, how="inner", on=["Quote_date", "Strike", "Ttl"])
df_options = df_options.rename(columns={"Prediction": "LSTM"})

<h3>Adding naive benchmark</h3>

In [11]:
features = ["Underlying_last", "Strike", "Ttl", "Volatility", "R"]
seq_length = 5
num_features = len(features)
num_outputs = 1

df_options = lag_features(df_options, features, seq_length)

<h3>Adding Black Scholes benchmarks</h3>

In [12]:
# Black-Scholes formula for call options
def d1(S,K,T,r,sigma):
    x1 = S.apply(lambda x : np.log(x)) - K.apply(lambda x : np.log(x))
    x2 = (r + ((sigma.apply(lambda x : x**2)) / 2)) * T
    x3 = sigma * T.apply(lambda x: np.sqrt(x))
    return  (x1 + x2) / x3

def d2(S,K,T,r,sigma):
    return d1(S,K,T,r,sigma) - sigma * T.apply(lambda x : np.sqrt(x))  

def bs_call(S,K,T,r,sigma):
    T = T/365
    r = r/100
    return S * d1(S,K,T,r,sigma).apply(lambda x : norm.cdf(x)) - K * (-r*T).apply(lambda x : np.exp(x)) * d2(S,K,T,r,sigma).apply(lambda x : norm.cdf(x))

In [6]:
# df_merge = read_file("../data/processed_data/2019-2021_underlying-strike_only-price.csv")
# df_merge = df_merge.drop_duplicates(subset=["Quote_date"], keep="first")[["Quote_date", "Volatility_GJR_GARCH"]]
# # df_options = pd.merge(df_options, df_merge, how="inner", on="Quote_date")

In [13]:
df_options["BS"] = bs_call(df_options["Underlying_last"], df_options["Strike"], df_options["Ttl"], df_options["R"], df_options["Volatility"])
df_options["BS-GJR-GARCH"] = bs_call(df_options["Underlying_last"], df_options["Strike"], df_options["Ttl"], df_options["R"], df_options["Volatility_GJR_GARCH"])

<h3>Saving results</h3>

In [14]:
df_options = df_options[["Quote_date", "Underlying_last", "Strike", "Ttl", "Volatility", "R", "Price", "Volatility_GJR_GARCH", "Naive", "MLP", "MLP-GARCH", "LSTM", "BS", "BS-GJR-GARCH"]]
df_options['Quote_date'] = pd.to_datetime(df_options.Quote_date, format='%Y-%m-%d')
df_options = df_options[df_options.Quote_date.dt.year == 2021]

df_options.to_csv('../data/Predictions/all_predictions.csv', encoding='utf-8', index=False)