In [35]:
#!pip install pandas_market_calendars
import pandas as pd
from datetime import datetime, timedelta
import plotly.graph_objects as go
import plotly.graph_objs as go
import plotly.offline as pyo
import plotly.express as px
import numpy as np
import os
from scipy.interpolate import griddata
from IPython.display import display, Markdown, HTML

# Option Basics

In [24]:
# Stock price range
stock_prices = list(range(0, 200))

# Strike price
strike_price = 100

# Calculate option payoff and premium
payoff_line = [max(0, price - strike_price) for price in stock_prices]
premium_line = [max(0, price - strike_price) - 2 for price in stock_prices]

# Create trace for option payoff
payoff_trace = go.Scatter(x=stock_prices, y=payoff_line, mode='lines', name='Option Payoff')

# Create trace for premium line
premium_trace = go.Scatter(x=stock_prices, y=premium_line, mode='lines', name='Premium', line=dict(dash='dash'), showlegend=False)

# Create shaded region for premium
premium_fill = go.Scatter(
    x=stock_prices + stock_prices[::-1],
    y=payoff_line + premium_line[::-1],
    fill='toself',
    fillcolor='rgba(0,100,80,0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    name='Premium'
)

# Create layout with annotations
layout = go.Layout(
    title='Call Option Payoff Diagram',
    xaxis=dict(title='Index Price', range=[80, 120]),
    yaxis=dict(title='Payoff on Index Option', range=[-20, 20]),
    annotations=[
        dict(
            x=strike_price,
            y=0,
            xref="x",
            yref="y",
            text="Strike Price",
            showarrow=True,
            arrowhead=4,
            ax=0,
            ay=-20,
        ),
    ],
)

# Create figure
figure = go.Figure(data=[payoff_trace, premium_trace, premium_fill], layout=layout)

# Show the figure
figure.show()
pyo.plot(figure, filename='call.html')

'call.html'

In [25]:
# Stock price range
stock_prices = list(range(0, 200))

# Strike price
strike_price = 100

# Calculate option payoff and premium
payoff_line  = [abs(min(0, price - strike_price))     for price in stock_prices]
premium_line = [abs(min(0, price - strike_price)) - 2 for price in stock_prices]

# Create trace for option payoff
payoff_trace = go.Scatter(x=stock_prices, y=payoff_line, mode='lines', name='Option Payoff')

# Create trace for premium line
premium_trace = go.Scatter(x=stock_prices, y=premium_line, mode='lines', name='Premium', line=dict(dash='dash'), showlegend=False)

# Create shaded region for premium
premium_fill = go.Scatter(
    x=stock_prices + stock_prices[::-1],
    y=payoff_line + premium_line[::-1],
    fill='toself',
    fillcolor='rgba(0,100,80,0.2)',
    line=dict(color='rgba(255,255,255,0)'),
    name='Premium'
)

# Create layout with annotations
layout = go.Layout(
    title='Put Option Payoff Diagram',
    xaxis=dict(title='Index Price', range=[80, 120]),
    yaxis=dict(title='Payoff on Index Option', range=[-20, 20]),
    annotations=[
        dict(
            x=strike_price,
            y=0,
            xref="x",
            yref="y",
            text="Strike Price",
            showarrow=True,
            arrowhead=4,
            ax=0,
            ay=-20,
        ),
    ],
)

# Create figure
figure = go.Figure(data=[payoff_trace, premium_trace, premium_fill], layout=layout)

# Show the figure
figure.show()
pyo.plot(figure, filename='put.html')

'put.html'

# Time Decay

In [21]:
datasetPath = "C:\\Users\\bendi\\Desktop\\dataset"

# Read Historic price data into a DataFrame
path = os.path.join(datasetPath, "spx.xlsx")
spx30minPrice = pd.read_excel(path, sheet_name="spx30min", parse_dates=['date'], index_col='date')
spxEodPrice = spx30minPrice.resample('D').last()
# Create a new column rounded to the nearest 50
spxEodPrice['rounded_last_price'] = (spxEodPrice['last_price'] / 50).round() * 50

tradingDaysFolders = os.listdir(os.path.join(datasetPath, "aggregatePerDay"))

csvFilePaths = [os.path.join(datasetPath, "aggregatePerDay", folderName, folderName+".csv") 
                for folderName in tradingDaysFolders]

In [22]:
option_type = "C"
start = datetime(2021,1,1)
expiration  = datetime(2023,9,15)

filtered_file_paths = [
    file_path
    for file_path in csvFilePaths
    if start <= datetime.strptime(file_path.split("_")[-1].split(".csv")[0], '%Y-%m-%d') <= expiration
]


x = [] # days_to_expiration
y = [] # midPrice

for n, path in enumerate(filtered_file_paths):
    dayData = pd.read_csv(path, parse_dates=['quote_date', 'expiration'])
    
    quote_date = dayData.iloc[0].quote_date
    spxclose = spxEodPrice.loc[quote_date, 'rounded_last_price']
    strike = spxclose + 100
    
    
    row = dayData[(dayData['strike'] == strike) & 
                  (dayData['expiration'] == expiration) & 
                  (dayData['option_type'] == option_type)]

    if not row.empty:
        extracted_values = row.iloc[0]
        
        days_to_expiration = (expiration - extracted_values.quote_date).days
        midPrice = (extracted_values.ask_eod + extracted_values.bid_eod) / 2
        
        x.append(days_to_expiration)
        y.append(midPrice)

In [28]:
# Create a trace
trace = go.Scatter(x=x, y=y, mode='markers+lines', marker=dict(size=8))

# Create a layout
layout = go.Layout(title='Options Time Decay Retrieved from our Dataset: S&P 500 Call Options, Expiring September 15th 2023',
                   xaxis=dict(title='Days to expiration', autorange='reversed'),
                   yaxis=dict(title='Option Premium (mid price EoD)'))

# Create a figure
fig = go.Figure(data=[trace], layout=layout)

# Show the plot
fig.show()
pyo.plot(fig, filename='time_decay.html')

'time_decay.html'

# Volatility surface

In [145]:
path = "C:\\Users\\bendi\\Desktop\\dataset\\UnderlyingOptionsTradesCalcs_2023-08-15\\UnderlyingOptionsTradesCalcs_2023-08-15.csv"
data = pd.read_csv(path, parse_dates=['quote_datetime', 'expiration'])

# filter out pre trading
data = data[data["quote_datetime"] >= datetime(2023, 8, 15, 9, 30)]
# filter out non 0dte
data = data[data["expiration"] == datetime(2023, 8, 15)]

In [146]:
count_df = data.groupby(['expiration', 'strike']).size().reset_index(name='count')
count_df.sort_values(by='count', ascending=False).head()

Unnamed: 0,expiration,strike,count
57,2023-08-15,4440.0,38259
59,2023-08-15,4450.0,35929
61,2023-08-15,4460.0,30499
58,2023-08-15,4445.0,29301
60,2023-08-15,4455.0,27674


In [147]:
# keep only strike==4440 options
#data = data[data["strike"] >= 4440]

In [148]:
nPuts  = len(data[data["option_type"]=="P"])
nCalls = len(data[data["option_type"]=="C"])

print(nPuts, nCalls)

207056 193624


In [149]:
# keep only puts
#data = data[data["option_type"] == "P"]

In [150]:
data["trade_size_X_trade_price"] = data["trade_size"] * data["trade_price"]

In [151]:
data

Unnamed: 0,underlying_symbol,quote_datetime,sequence_number,root,expiration,strike,option_type,exchange_id,trade_size,trade_price,...,trade_delta,underlying_bid,underlying_ask,number_of_exchanges,{exchange,bid_size,bid,ask_size,ask}[number_of_exchanges],trade_size_X_trade_price
27683,^SPX,2023-08-15 15:58:27.837,2929185488,SPXW,2023-08-15,1200.0,C,5,1,3241.40,...,0.0000,4441.9902,4441.9902,0,,,,,,3241.40
27684,^SPX,2023-08-15 15:58:53.306,2933119216,SPXW,2023-08-15,1200.0,C,5,1,3241.40,...,0.0000,4441.2441,4441.2441,0,,,,,,3241.40
27685,^SPX,2023-08-15 13:59:43.351,2151710499,SPXW,2023-08-15,2400.0,C,5,1,2049.40,...,0.0000,4449.1996,4449.1996,0,,,,,,2049.40
27686,^SPX,2023-08-15 15:51:57.053,2871252797,SPXW,2023-08-15,2800.0,C,5,1,1636.11,...,0.0000,4436.0217,4436.0217,0,,,,,,1636.11
27687,^SPX,2023-08-15 15:52:08.340,2872954117,SPXW,2023-08-15,2800.0,C,5,1,1636.97,...,0.0000,4437.5138,4437.5138,0,,,,,,1636.97
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
442182,^SPX,2023-08-15 15:41:30.560,2780396188,SPXW,2023-08-15,5100.0,P,5,2,667.98,...,-0.9993,4432.0427,4432.0427,0,,,,,,1335.96
442183,^SPX,2023-08-15 15:41:30.560,2780396192,SPXW,2023-08-15,5100.0,P,5,2,667.98,...,-0.9993,4432.0427,4432.0427,0,,,,,,1335.96
442184,^SPX,2023-08-15 15:41:30.560,2780396196,SPXW,2023-08-15,5100.0,P,5,2,667.98,...,-0.9993,4432.0427,4432.0427,0,,,,,,1335.96
442185,^SPX,2023-08-15 15:41:30.560,2780396201,SPXW,2023-08-15,5100.0,P,5,1,667.98,...,-0.9993,4432.0427,4432.0427,0,,,,,,667.98


In [152]:
# aggregate data per minute

# Set 'timestamp' as the index
data.set_index('quote_datetime', inplace=True)

# Group by minute and aggregate
df_minute = data.resample('T').agg({'trade_size_X_trade_price': 'sum', 'trade_size': 'sum', 'strike':'first'})

# If you want to keep the timestamp as a column
df_minute.reset_index(inplace=True)

df_minute["VWAP"] = df_minute["trade_size_X_trade_price"] / df_minute["trade_size"]

# Display the resulting DataFrame
df_minute


Unnamed: 0,quote_datetime,trade_size_X_trade_price,trade_size,strike,VWAP
0,2023-08-15 09:30:00,36858.43,12265,4100.0,3.005172
1,2023-08-15 09:31:00,41960.72,16444,4510.0,2.551734
2,2023-08-15 09:32:00,39390.08,12203,4495.0,3.227901
3,2023-08-15 09:33:00,35143.14,10150,4460.0,3.462378
4,2023-08-15 09:34:00,31955.35,7915,4505.0,4.037315
...,...,...,...,...,...
385,2023-08-15 15:55:00,12004.99,6345,4440.0,1.892039
386,2023-08-15 15:56:00,6191.25,5348,4435.0,1.157676
387,2023-08-15 15:57:00,3679.25,3054,4415.0,1.204731
388,2023-08-15 15:58:00,13875.69,3676,4440.0,3.774671


In [153]:
path = "C:\\Users\\bendi\\Desktop\\dataset\\spx.xlsx"
spx1minPrice = pd.read_excel(path, sheet_name="spx15august1min", parse_dates=['date'])
spx1minPrice['date'] = spx1minPrice['date'] - pd.Timedelta(hours=6)

In [154]:
merged = pd.merge(spx1minPrice, df_minute, left_on='date', right_on='quote_datetime', how='inner')

In [155]:
# Calculate monyness
merged["monyness"] = merged["strike"] / merged["last_price"]
# Time to maturity 
merged['time_to_maturity'] = (datetime(2023, 8, 15, 16) - merged['date']).dt.total_seconds()/60
# Calculate 5-minute volatility using rolling window
merged.sort_values(by='date', inplace=True)
merged['five_min_volatility'] = merged['VWAP'].rolling(window=5).std()

In [156]:
merged[["date","monyness",'time_to_maturity', 'VWAP','five_min_volatility']]

Unnamed: 0,date,monyness,time_to_maturity,VWAP,five_min_volatility
389,2023-08-15 09:30:00,0.916635,390.0,3.005172,
388,2023-08-15 09:31:00,1.008698,389.0,2.551734,
387,2023-08-15 09:32:00,1.004624,388.0,3.227901,
386,2023-08-15 09:33:00,0.997185,387.0,3.462378,
385,2023-08-15 09:34:00,1.007280,386.0,4.037315,0.550621
...,...,...,...,...,...
4,2023-08-15 15:55:00,0.999741,5.0,1.892039,1.375920
3,2023-08-15 15:56:00,0.998824,4.0,1.157676,1.340247
2,2023-08-15 15:57:00,0.994383,3.0,1.204731,0.652187
1,2023-08-15 15:58:00,0.999674,2.0,3.774671,1.113423


In [157]:
merged[["monyness",'time_to_maturity','five_min_volatility']]

Unnamed: 0,monyness,time_to_maturity,five_min_volatility
389,0.916635,390.0,
388,1.008698,389.0,
387,1.004624,388.0,
386,0.997185,387.0,
385,1.007280,386.0,0.550621
...,...,...,...
4,0.999741,5.0,1.375920
3,0.998824,4.0,1.340247
2,0.994383,3.0,0.652187
1,0.999674,2.0,1.113423


In [174]:
merged = merged.dropna()

# Set up a grid for interpolation
grid_x, grid_y = np.mgrid[merged['monyness'].min():merged['monyness'].max():100j,
                          merged['time_to_maturity'].min():merged['time_to_maturity'].max():100j]

# Interpolate the data
grid_z = griddata((merged['monyness'], merged['time_to_maturity']),
                  merged['five_min_volatility'], (grid_x, grid_y), method='cubic')

# Create a new DataFrame for the interpolated data
interpolated_data = pd.DataFrame({'monyness': grid_x.flatten(),
                                  'time_to_maturity': grid_y.flatten(),
                                  'five_min_volatility': grid_z.flatten()})










fig1 = go.Figure()
fig1.add_trace(go.Scatter3d(x=interpolated_data['monyness'],
                         y=interpolated_data['time_to_maturity'],
                         z=interpolated_data['five_min_volatility'],
                         mode='markers',
                         marker=dict(size=8, color=interpolated_data['five_min_volatility'], colorscale='Viridis')))

                  
fig2 = go.Figure()

fig2.add_trace(go.Scatter3d(x=merged['monyness'],
                         y=merged['time_to_maturity'],
                         z=merged['five_min_volatility'],
                         mode='markers',
                         marker=dict(size=8, color=merged['five_min_volatility'], colorscale='Viridis')))


fig1.update_layout(title='Surface Plot',               title_x=0.5, title_y=0.8, height=900,scene=dict(xaxis=dict(title='Monyness'),yaxis=dict(title='Minutes to Maturity'),zaxis=dict(title='Five Minute Volatility'),camera=dict(eye=dict(x=-1.5,y=1.5,z=1))))
fig2.update_layout(title='Actual Data Aug 15th, 2023', title_x=0.5, title_y=0.8, height=900,scene=dict(xaxis=dict(title='Monyness'),yaxis=dict(title='Minutes to Maturity'),zaxis=dict(title='Five Minute Volatility'),camera=dict(eye=dict(x=-1.5,y=1.5,z=1))))

# Combine the two plots into a single HTML file
fig1_html = fig1.to_html(full_html=False)
fig2_html = fig2.to_html(full_html=False)

combined_html = f"""
<!DOCTYPE html>
<html>
<head>
  <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
</head>
<body>

<div style="display: flex; height: 100vh;">
  <div id="plot1" style="width: 50%; height: 50vh;">
    {fig1_html}
  </div>
  <div id="plot2" style="width: 50%; height: 50vh;">
    {fig2_html}
  </div>
</div>

</body>
</html>
"""

with open("volatility_surface.html", "w") as file:
    file.write(combined_html)


<br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br><br>

In [None]:
merged = merged[~((merged["five_min_volatility"] > 1.3) & (merged["time_to_maturity"] > 83))]

In [44]:
# Dataset after processing and filtered out days without 0DTE trades



display(Markdown("**Summary of datasets**"))
display(Markdown("Processed and filtered to exclude days without 0DTE trades"))
display(HTML(pd.DataFrame({
    "Dataset":  ["April 2013 to May 2022 EoD data", 
                 "May 2022 to October 2023 EoD data",
                 "August 15th, 2023, 1-minute data",
                 "SPX 30-min price data: April 2013 - October 2023",
                 "SPX 1-min price data: August 15th, 2023"],
    "Data": ["nDTE%, Option Volume",
                 "nDTE%, Option Volume",
                 "Put/Call Ratio, nDTE%, Option Volume",
                 "Price, volatility",
                 "Price, volatility"],
    "Number of Observation": [1083, 349, 390, 2640, 390]
}).to_html(index=False)))

**Summary of datasets**

Processed and filtered to exclude days without 0DTE trades

Dataset,Data,Number of Observation
April 2013 to May 2022 EoD data,"nDTE%, Option Volume",1083
May 2022 to October 2023 EoD data,"nDTE%, Option Volume",349
"August 15th, 2023, 1-minute data","Put/Call Ratio, nDTE%, Option Volume",390
SPX 30-min price data: April 2013 - October 2023,"Price, volatility",2640
"SPX 1-min price data: August 15th, 2023","Price, volatility",390
