In [1]:
# Python 3.7
# author: Leonardo Filipe
# website: https://www.leonardofilipe.com
# contact: contact[at]leonardofilipe.com
%matplotlib notebook
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.interpolate import Rbf
from matplotlib import cm
from mpl_toolkits.mplot3d import axes3d

plt.style.use('seaborn')
pd.options.mode.chained_assignment = None

db = pd.read_hdf("optiondata.h5")
db['Now'] = pd.to_datetime(db['Now'])
db['Maturity'] = pd.to_datetime(db['Maturity'])

In [2]:
N0 = db[db['Now'] == '2018-04-17 14:30:00']
M0 = N0[N0['Maturity'] == '2018-06-15 17:30:00']
M0[M0['CallImpVol'] < 0.001] = 0
M0 = M0.loc[(M0!=0).any(1)]
M0 = M0.reset_index().drop('index',axis=1)
M0['Moneyness'] = abs(M0['Strike']/M0['Spot'])

In [3]:
p = np.poly1d(np.polyfit(M0['Moneyness'], M0['CallImpVol'], 2))

xp = np.linspace(0.96, 1.09, 68)

plt.figure(figsize=(9,6))
plt.plot(M0['Moneyness'], M0['CallImpVol'], '.', label='Raw Data')
plt.plot(xp, p(xp), '-', label='First Attempt')
plt.legend()
plt.show()

<IPython.core.display.Javascript object>

So obviously implied volatility is related to the moneyness of the contracts but that's not the only driver, maturity also plays an important role, as they move closer to it, they gain leverage and thats also displayed on the implied volatility. This polynomial also seems to exarcebate the implied volatility of the deeper in & out of the money contracts

In [4]:
splinterp = Rbf(M0['Moneyness'], M0['Expiration'], M0['CallImpVol'], function='cubic', smooth=0)
rbffit = splinterp(M0['Moneyness'], M0['Expiration'])

And probably we're gonna need a more robust method to deal with the outliers and add the time factor, we'll have to use a multivariate (3d) polynomial fit or a multivariate spline interpolation, I decided to try the radial basis function to perform the latter, despite not having an appropriate sample to describe the effects of time on the implied volatility since that would require at least 1 month of option data

In [5]:
# Testing the model a month later
db2 = pd.read_hdf("moreoptiondata.h5")
db2['Now'] = pd.to_datetime(db2['Now'])
db2['Maturity'] = pd.to_datetime(db2['Maturity'])

M0 = db2[db2['Maturity'] == '2018-06-15 17:30:00']
M0[M0['CallImpVol'] < 0.001] = 0
M0 = M0.loc[(M0!=0).any(1)]
M0 = M0.reset_index().drop('index',axis=1)
M0['Moneyness'] = abs(M0['Strike']/M0['Spot'])

In [6]:
fig = plt.figure(figsize=(9,6))
ax = fig.gca(projection='3d')
ax.plot_trisurf(M0['Moneyness'], M0['Expiration'],
                p(M0['Moneyness']), cmap=cm.plasma, linewidth=0.1)

ax.set_xlabel('Moneyness')
ax.set_ylabel('Time to Expire')
ax.set_zlabel('Implied Volatility')

ax.dist=12
ax.view_init(30, 112.5)
plt.title('First Attempt')
plt.show()

<IPython.core.display.Javascript object>

Not suitable at all, we are gonna need a more robust method or to perform some kind of smoothing on the edges and to update the model itself across time

In [7]:
fig = plt.figure(figsize=(9,6))
ax = fig.gca(projection='3d')
ax.plot_trisurf(M0['Moneyness'], M0['Expiration'],
                splinterp(M0['Moneyness'], M0['Expiration']), cmap=cm.plasma, linewidth=0.1)

ax.set_xlabel('Moneyness')
ax.set_ylabel('Time to Expire')
ax.set_zlabel('Implied Volatility')

ax.dist=12
ax.view_init(30, 112.5)
plt.title('First Attempt')
plt.show()

<IPython.core.display.Javascript object>

And the radial basis function is gonna require a model to build the surface from otherwise it will just replicate the past

In [None]:
# Tentative objective function nº1
def predictionerror():
    for x, name in enumerate(M01['Moneyness'])
        return ((M01['Moneyness'][x]/M00['Moneyness'][x]-1)*a+(M01['Expiration'][x]/M00['Expiration'][x]-1)*b+M00['CallImpVol'][x]*c)-M01['CallImpVol'][x]