# Realized vs Implied Volatility

In [1]:
# Since the market does not have perfect knowledge about the future these two numbers can and will be different.
# Therein, lies the risk management problem / business or trading opportunity.

In [2]:
import os
import time
import pickle

import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from datetime import timedelta, datetime, date
from thetadata import ThetaClient, OptionReqType, OptionRight, DateRange, DataType, StockReqType

# Get all Expirations for MSFT Options

In [3]:
# First thing we need is all the expiry dates of all contracts on MSFT that ThetaData has available.

In [4]:
your_username = ''
your_password = ''

def get_expirations(root_ticker) -> pd.DataFrame:
    """Request expirations from a particular options root"""
    # Create a ThetaClient
    client = ThetaClient(username=your_username, passwd=your_password, jvm_mem=4, timeout=15)

    # Connect to the Terminal
    with client.connect():

        # Make the request
        data = client.get_expirations(
            root=root_ticker,
        )

    return data

Making requests to API for all Contracts by Expiry Dates

In [6]:
root_ticker = 'MSFT'
expirations = get_expirations(root_ticker)
expirations

If you require API support, feel free to join our discord server! http://discord.thetadata.us
[09-16-2023 12:59:21] INFO: Starting Theta Terminal v1.0.6 Revision Rev A...
[09-16-2023 12:59:21] INFO: Config dir: C:\Users\hp\ThetaData\ThetaTerminal
[09-16-2023 12:59:23] INFO: [MDDS] Attempting login as 
[09-16-2023 12:59:24] INFO: [MDDS] Disconnected from server: INVALID_CREDENTIALS
[09-16-2023 12:59:25] INFO: [FPSS] Attempting login as 
[09-16-2023 12:59:26] INFO: [FPSS] Disconnected from server: INVALID_CREDENTIALS


TimeoutError: timed out

# Get all Strikes for each MSFT Option Expiry

We will need these later, so I will build up a dictionary and pickle this data for future use.

In [7]:
def get_strikes(root_ticker, expiration_dates) -> pd.DataFrame:
    """Request strikes from a particular option contract"""
    # Create a ThetaClient
    client = ThetaClient(username=your_username, passwd=your_password, jvm_mem=4, timeout=15)
    
    all_strikes = {}

    # Connect to the Terminal
    with client.connect():
        
        for exp_date in expiration_dates:
        
            # Make the request
            data = client.get_strikes(
                root=root_ticker,
                exp=exp_date
            )
            
            all_strikes[exp_date] = pd.to_numeric(data)
            

    return all_strikes

Making requests to API for Strikes

In [8]:
root_ticker = 'MSFT'

all_strikes = get_strikes(root_ticker, expirations)

with open('MSFT_strikes.pkl', 'wb') as f:
    pickle.dump(all_strikes, f)

with open('MSFT_strikes.pkl', 'rb') as f:
    all_strikes = pickle.load(f)
    
all_strikes[expirations[360]]

# MSFT Underlying ThetaData Request

In [9]:
# We will be leveraging the ability to aggregate time periods throughout the day using the API, by defining a interval_size. 
# We will then compare the historical volatility to the implied volatility for every trading day for 
# quotes that were made in the underlying and options of ATM options in the afternoon (14:00).

In [13]:
def get_hist_stock(root_ticker, trading_days, interval_size) -> pd.DataFrame:
    """Request historical data for an underlying"""
    # Create a ThetaClient
    client = ThetaClient(username=your_username, passwd=your_password, jvm_mem=4, timeout=15)
    
    underlying = {}

    # Connect to the Terminal
    with client.connect():
        # Make the request
        
        for tdate in trading_days:
            
            try:
                data = client.get_hist_stock(
                    req=StockReqType.QUOTE,
                    root=root_ticker,
                    date_range=DateRange(tdate, tdate),
                    interval_size=interval_size
                )
                
                data = data.apply(weighted_mid_price, axis=1)
                
                underlying[tdate] = data[4]
                
            except:
                underlying[tdate] = np.nan

    return underlying

Calculate Weighted Mid Price (Micro-Price)

In [14]:
# Calculate the weighted mid price (micro-price) for each row within our quotes dataframe.

In [15]:
def weighted_mid_price(row):
    try:
        V_mid = row[DataType.ASK_SIZE] + row[DataType.BID_SIZE]
        x_a = row[DataType.ASK_SIZE]/V_mid
        x_b = 1 - x_a
        return row[DataType.ASK]*x_a + row[DataType.BID]*x_b
    except:
        return np.nan

Making requests to API for Underlying

In [None]:
root_ticker = 'MSFT'
trading_days = pd.date_range(start=datetime(2012,6,1),end=datetime(2022,11,14),freq='B')
interval_size = 60*60000

underlying = get_hist_stock(root_ticker, trading_days, interval_size)

with open('underlying.pkl', 'wb') as f:
    pickle.dump(underlying, f)

# Volatility over 30d window (~21 trading days)

In [None]:
with open('underlying.pkl', 'rb') as f:
    underlying = pickle.load(f)

spot = pd.DataFrame(underlying.items(), columns=['trade_date', 'price'])
spot.set_index('trade_date', inplace=True)
spot = spot.dropna()

log_returns = np.log(spot/spot.shift(1)).dropna()

TRADING_DAYS = 21
spot['vol'] = log_returns.rolling(window=TRADING_DAYS).std()*np.sqrt(252)
spot.tail()

fig,ax = plt.subplots(figsize=(12,4))
ax.plot(spot['price'], color='tab:blue')
ax2=ax.twinx()
ax2.plot(spot['vol']*100, color='tab:red')

# set x-axis label
ax.set_xlabel("year", fontsize = 14)
# set y-axis label
ax.set_ylabel("Stock Price (USD $)",
              color="tab:blue",
              fontsize=14)

ax2.set_ylabel("Volatility (%)",color="tab:red",fontsize=14)
plt.show()

# Market-Makers are not forced to show Quotes on all options!

In [None]:
trading_days = pd.date_range(start=datetime(2012,6,1),end=datetime(2022,11,14),freq='B')
# The third friday in every month
contracts1 = pd.date_range(start=datetime(2012,6,1),end=datetime(2024,12,31),freq='WOM-3FRI')
# Saturday following the third friday in every month
contracts2 = pd.date_range(start=datetime(2012,6,1),end=datetime(2022,12,31),freq='WOM-3FRI')+timedelta(days=1)
# Combine these contracts into a total pandas index list
contracts = contracts1.append(contracts2)
# Find contract expiries that match with ThetaData expiries 
mth_expirations = [exp for exp in expirations if exp in contracts]
# Convert from python list to pandas datetime
mth_expirations = pd.to_datetime(pd.Series(mth_expirations))

print('Number of possible monthly contracts', len(contracts), 'compared to total avail',len(mth_expirations), 
      'compared to total no. options avail (incl. quarterly + weekly)', len(expirations))

# Days to Expiry (DTE)

In [None]:
# Find the contracts that are closest 1mth, 2mth, 3mth and 4mth to expiry

In [None]:
trading_days = pd.date_range(start=datetime(2012,6,1),end=datetime(2022,11,14),freq='B')

contracts = {}
DTE = [30,60,90,120]
for trade_date in trading_days:
    days = [delta.days for delta in mth_expirations - trade_date]
    index_contracts = [min({(abs(day-dte),i) for i,day in enumerate(days)})[1] for dte in DTE]
    contracts[trade_date] = index_contracts

# Implied volatility requests

# Make the request

In [None]:
# Make the request
def implied_vol(root_ticker, trading_days, interval_size=0, opt_type=OptionRight.CALL) -> pd.DataFrame:
    """Request quotes both bid/ask options data"""
    # Create a ThetaClient
    client = ThetaClient(username=your_username, passwd=your_password, jvm_mem=4, timeout=15)
    
    # Store all iv in datas dictionary
    datas = {}
    DTE = ['1mth','2mth','3mth','4mth']
    total_days = len(trading_days)

    # Connect to the Terminal
    with client.connect():
        
        for ind, trade_date in enumerate(trading_days):
            
            print('*'*100, '\nSTART:' ,trade_date, ind+1, '/', total_days ,'\n','*'*100)
            
            # Get the expiry dates for specific contracts on particular trade date
            exp_dates = mth_expirations[contracts[trade_date]]
            datas[trade_date] = {}
            
            # For each expiry we want to get closest ATM iv
            for exp_ind, exp_date in enumerate(exp_dates):

                # determine closest ATM strike - iterate through all strikes of expiry date.
                diff_strike = [delta for delta in all_strikes[exp_date] - underlying[trade_date]]
                # Min. difference between particular DTE interested, return index
                index_strike = min({(abs(Kdiff),i) for i,Kdiff in enumerate(diff_strike)})[1]
                # Return closest ATM strike
                strike = all_strikes[exp_date][index_strike]

                try:
                    # Attempt to request historical options implied volatility
                    data = client.get_hist_option(
                        req=OptionReqType.IMPLIED_VOLATILITY,
                        root=root_ticker,
                        exp=exp_date,
                        strike=strike,
                        right=opt_type,
                        date_range=DateRange(trade_date, trade_date),
                        progress_bar=False,
                        interval_size=interval_size
                    )
                    
                    # Store data in dictionary
                    datas[trade_date][DTE[exp_ind]] = data.loc[4,DataType.IMPLIED_VOL]

                except:
                    # If unavailable, store np.nan
                    datas[trade_date][DTE[exp_ind]] =  np.nan 

    return datas

Making requests to API for IV

In [None]:
start_all = time.time()

datas_call = implied_vol(root_ticker, trading_days, interval_size = 60*60000, opt_type=OptionRight.CALL)

with open('datas_mth_calls.pkl', 'wb') as f:
    pickle.dump(datas_call, f)

datas_put = implied_vol(root_ticker, trading_days, interval_size = 60*60000, opt_type=OptionRight.PUT)

with open('datas_mth_puts.pkl', 'wb') as f:
    pickle.dump(datas_put, f)

end_all = time.time()
print('*'*100,'  TOTAL time taken {:.2f} s'.format(end_all-start_all),'*'*100)

To demonstrate what that looks like

In [None]:
trading_days = pd.date_range(start=datetime(2022,11,7),end=datetime(2022,11,11),freq='B')

start_all = time.time()

datas = implied_vol(root_ticker, trading_days, interval_size = 60*60000, opt_type=OptionRight.CALL)

end_all = time.time()
print('*'*100,'  TOTAL time taken {:.2f} s'.format(end_all-start_all),'*'*100)

df = pd.DataFrame(datas.items(), columns=['trade_date', 'price']) 
N = len(df)
calls = np.empty([N, 4])
for ind, (date, data) in enumerate(datas.items()):
    calls[ind, 0] = data['1mth']
    calls[ind, 1] = data['2mth']
    calls[ind, 2] = data['3mth']
    calls[ind, 3] = data['4mth']

df = pd.DataFrame(data=calls, index=df.trade_date, columns=['1mth','2mth','3mth','4mth'])
df

# Visualise IV from Calls

In [None]:
with open('datas_mth_calls.pkl', 'rb') as f:
    datas_call = pickle.load(f)

df_calls = pd.DataFrame(datas_call.items(), columns=['trade_date', 'price']) 

N = len(datas_call)
calls = np.empty([N, 4])
for ind, (date, data) in enumerate(datas_call.items()):
    calls[ind, 0] = data['1mth']
    calls[ind, 1] = data['2mth']
    calls[ind, 2] = data['3mth']
    calls[ind, 3] = data['4mth']

df_calls = pd.DataFrame(data=calls, index=df_calls.trade_date, columns=['1mth','2mth','3mth','4mth'])
print('Data available', len(df_calls.dropna(how='all')), 'out of', len(df_calls))

df_calls = df_calls.dropna(how='all')
df_calls.tail()

fig,ax = plt.subplots(figsize=(12,4))
ax.plot(df_calls['1mth'])
ax.plot(df_calls['2mth'])
ax.plot(df_calls['3mth'])
ax.plot(df_calls['4mth'])
plt.show()

# Whats happening to 1mth+ options series?

In [None]:
fig,ax = plt.subplots(figsize=(12,4))
ax.plot(spot['price'], color='tab:blue', label='Stock')
ax2=ax.twinx()
ax2.plot(spot['vol']*100, color='tab:red', label='rv_30')
ax2.plot(df_calls['1mth']*100, color='tab:green', label='iv_30')
# set x-axis label
ax.set_xlabel("year", fontsize = 14)
# set y-axis label
ax.set_ylabel("Stock Price (USD $)",
              color="tab:blue",
              fontsize=14)

ax2.set_ylabel("Volatility (%)",color="tab:red",fontsize=14)

fig.legend()
plt.show()

# Is this a fair comparison?

In [None]:
spot['vol_shift'] = spot['vol'].shift(-21)
fig,ax = plt.subplots(figsize=(12,4))
ax.plot(spot['price'], color='tab:blue', label='Stock')
ax2=ax.twinx()
plt.title('IV Calls vs RV shifted')
ax2.plot(spot['vol_shift']*100, color='tab:red', label='rv_30')
ax2.plot(df_calls['1mth']*100, color='tab:green', label='iv_30')
# set x-axis label
ax.set_xlabel("year", fontsize = 14)
# set y-axis label
ax.set_ylabel("Stock Price (USD $)",
              color="tab:blue",
              fontsize=14)

ax2.set_ylabel("Volatility (%)",color="tab:red",fontsize=14)

fig.legend()
plt.show()

# Now let’s check the puts data

In [None]:
with open('datas_mth_puts.pkl', 'rb') as f:
    datas_put = pickle.load(f)

df_puts = pd.DataFrame(datas_put.items(), columns=['trade_date', 'price']) 

N = len(datas_put)
puts = np.empty([N, 4])
for ind, (date, data) in enumerate(datas_put.items()):
    puts[ind, 0] = data['1mth']
    puts[ind, 1] = data['2mth']
    puts[ind, 2] = data['3mth']
    puts[ind, 3] = data['4mth']
# df.set_index('trade_date', inplace=True)
# df = df.dropna()

df_puts = pd.DataFrame(data=puts, index=df_puts.trade_date, columns=['1mth','2mth','3mth','4mth'])
print('Data available', len(df_puts.dropna(how='all')), 'out of', len(df_puts))

df_puts = df_puts.dropna(how='all')
df_puts

fig,ax = plt.subplots(figsize=(12,4))
ax.plot(spot['price'], color='tab:blue')
ax2=ax.twinx()
plt.title('IV Puts vs IV Calls')
ax2.plot(df_puts['1mth'], color='tab:red', label='IV Puts')
ax2.plot(df_calls['1mth'], color='tab:orange', label='IV Calls')
# set x-axis label
ax.set_xlabel("year", fontsize = 14)
# set y-axis label
ax.set_ylabel("Stock Price (USD $)",
              color="tab:blue",
              fontsize=14)

ax2.set_ylabel("Volatility",color="tab:red",fontsize=14)
fig.legend()
plt.show()