In [25]:
import os
import pandas as pd
import datetime as dt

raw_data = pd.read_csv(os.path.join('/mnt/c/Users/Steve/implied_vol_machine_learning', 'options_20220824.csv'))

# For simplicity, let's only analyze call options
# Also clean out some bad data
call_data = raw_data.loc[(raw_data["Type"] == "call") & (raw_data["Ask"] < 99000.0)].copy()

# Add some columns
call_data.loc[:, "moneyness"] = call_data["Strike"] / call_data["UnderlyingPrice"]
call_data.loc[:, "implied_vol"] = call_data["IV"]
call_data.loc[:, "maturity"] = (pd.to_datetime(call_data["Expiration"]) - pd.to_datetime(call_data[" DataDate"])).dt.days / 365
call_data.loc[:, "ticker"] = call_data["UnderlyingSymbol"]
call_data.loc[:, "Mid"] = (call_data["Bid"]+call_data["Ask"])/2
print(f"Quote dates: {call_data[' DataDate'].unique()}")
print(f"Moneyness: min={call_data['moneyness'].min()}, max={call_data['moneyness'].max()}")
print(f"Maturity: min={call_data['maturity'].min()}, max={call_data['maturity'].max()}")

Quote dates: ['08/24/2022 16:00']
Moneyness: min=0.005609846402405502, max=8750.0
Maturity: min=-0.0027397260273972603, max=5.315068493150685


In [26]:
# Drop the columns we don't need
# Remove negative maturities and deep out of the money options
model_input_data = call_data.loc[(call_data["maturity"] > 0.0) & (call_data["moneyness"] <= 2.5), ["ticker", "moneyness", "maturity", "implied_vol"]].copy() # we don't need other columns for this exercise

In [27]:
import QuantLib as ql
import numpy as np

# Create MSE function
async def sabr_vol_mse(model_params, maturity, moneyness, implied_vol):
    alpha, beta, nu, rho = model_params
    val = np.array([ql.sabrVolatility(float(money), 1, float(mat), alpha, beta, nu, rho) for mat, money in zip(maturity, moneyness)]) - implied_vol
    return np.sqrt(val * val).sum()


In [35]:
import time
from scipy.optimize import minimize

alpha = 0.5; beta = 0.5; nu = 1.0; rho = 0.5
start_values = [alpha, beta, nu, rho]
bounds = [(0.0001, float('inf')), (0.0001, 1.0), (0.0001, float('inf')), (-0.9999, 0.9999)]

model_params_by_ticker = {}
error_tickers = []
start = time.time()
for ticker, ticker_data in model_input_data.groupby('ticker'):
    res = minimize(sabr_vol_mse, start_values, bounds=bounds, args=(ticker_data['maturity'], ticker_data['moneyness'], ticker_data['implied_vol']), tol=1e-3, method="Powell")
    if res.success:
        model_params_by_ticker[ticker] = res.x
    else:
        error_tickers.append(ticker)
end = time.time()

print(end-start)
print(error_tickers)
print(len(model_params_by_ticker))

1194.0928888320923
[]
5685


In [47]:
models = pd.DataFrame(model_params_by_ticker).T.rename(columns={0: 'Alpha', 1: 'Beta', 2: 'Nu', 3: 'Rho'})
print(models.iloc[0:3])

models.to_csv(os.path.join('/mnt/c/Users/Steve/implied_vol_machine_learning', 'model_parameters_by_ticker.csv'))

         Alpha      Beta        Nu       Rho
A     0.314901  0.651984  0.766485 -0.427394
AA    0.617718  0.021392  0.474265  0.999899
AADI  1.028776  0.373621  0.734218  0.999900
