# Skeleton Simulation

#### Imports

In [1]:
import sys
import math
import warnings

import psycopg2
import wrds
import gzip

import seaborn as sns
import os
import quandl
import json
import zipfile
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import functools
import requests
import io

import urllib.request
from urllib.error import HTTPError
from html_table_parser.parser import HTMLTableParser
from bs4 import BeautifulSoup
import re

import plotnine as p9
from plotnine import ggplot, scale_x_date, guides, guide_legend, geom_bar, scale_y_continuous, \
    scale_color_identity, geom_line, geom_point, labs, theme_minimal, theme, element_blank, element_text, \
        geom_ribbon, geom_hline, aes, scale_size_manual, scale_color_manual, ggtitle

from datetime import datetime
import datetime

import pandas as pd
import pandas_market_calendars as mcal
from pandas.plotting import autocorrelation_plot
import numpy as np
from numpy import cumsum, log, polyfit, sqrt, std, subtract
import scipy as sp
from scipy.stats import norm
import scipy.stats as stats

from statsmodels.tsa.stattools import coint
from statsmodels.graphics.tsaplots import plot_acf
import statsmodels.api as sm
from statsmodels.tsa.stattools import acf
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant

from collections import deque
from bisect import insort, bisect_left
from itertools import islice


### Retrieve data

In [2]:
csv_file_path = 'combinedata.csv'
data = pd.read_csv(csv_file_path)

In [3]:
display(data.head(200))

Unnamed: 0,date,exdate,last_date,cp_flag,strike_price,best_bid,best_offer,volume,open_interest,impl_volatility,delta,gamma,vega,theta,expiry_indicator,close,adj_open,adj_close,adj_volume
0,2018-01-02,2018-01-03,2017-12-28,C,235.0,33.59,33.81,0.0,187.0,,,,,,w,268.77,242.053393,242.893856,86655749.0
1,2018-01-02,2018-01-03,2018-01-02,C,240.0,28.59,28.76,1.0,88.0,,,,,,w,268.77,242.053393,242.893856,86655749.0
2,2018-01-02,2018-01-03,2017-12-27,C,242.5,26.09,26.32,0.0,2.0,,,,,,w,268.77,242.053393,242.893856,86655749.0
3,2018-01-02,2018-01-03,2018-01-02,C,245.0,23.59,23.81,12.0,58.0,,,,,,w,268.77,242.053393,242.893856,86655749.0
4,2018-01-02,2018-01-03,,C,247.5,21.08,21.32,0.0,0.0,,,,,,w,268.77,242.053393,242.893856,86655749.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,2018-01-02,2018-01-05,2018-01-02,P,268.5,0.53,0.55,4726.0,2801.0,0.069933,-0.429563,0.231414,9.563643,-39.13638,w,268.77,242.053393,242.893856,86655749.0
196,2018-01-02,2018-01-05,2018-01-02,P,269.0,0.73,0.76,4684.0,5112.0,0.065758,-0.549924,0.248915,9.635980,-36.64628,w,268.77,242.053393,242.893856,86655749.0
197,2018-01-02,2018-01-05,2018-01-02,P,269.5,1.02,1.07,357.0,1231.0,0.065012,-0.671967,0.231273,8.791567,-32.57904,w,268.77,242.053393,242.893856,86655749.0
198,2018-01-02,2018-01-05,2018-01-02,P,270.0,1.38,1.45,767.0,11745.0,0.065794,-0.775328,0.191777,7.278910,-26.83571,w,268.77,242.053393,242.893856,86655749.0


### Graphs

We will graph Gamma over time, combined gamma over time of the initially ATM call and put, delta over time for individual contracts and some zero-delta pairs - we'll count this as one graph since it is essentially the same thing.

We will also graph the gamma for ATM options (of different expiry) over time; IV for ATM options over time.

We will also graph the volume/open interest of options for a single day, to visualize the volume and open interest for single-day samples within our data.

In [7]:
calls_df = pd.read_csv('calls.csv')
puts_df = pd.read_csv('puts.csv')

In [8]:
calls_df.rename(columns={'date_x': 'date'}, inplace=True)
puts_df.rename(columns={'date_x': 'date'}, inplace=True)

calls_df['date'] = pd.to_datetime(calls_df['date'])
puts_df['date'] = pd.to_datetime(puts_df['date'])

In [12]:
calls_grouped = calls_df.groupby('date')['strike_price'].apply(list).reset_index(name='calls_strike_prices')
puts_grouped = puts_df.groupby('date')['strike_price'].apply(list).reset_index(name='puts_strike_prices')

merged_df = pd.merge(calls_grouped, puts_grouped, on='date')

merged_df['strike_prices_match'] = merged_df.apply(lambda row: set(row['calls_strike_prices']) == set(row['puts_strike_prices']), axis=1)

dates_strike_prices_dont_match = merged_df[merged_df['strike_prices_match'] == False]['date']
print(dates_strike_prices_dont_match)

Series([], Name: date, dtype: datetime64[ns])


In [19]:
print(calls_df.columns)
print(puts_df.columns)

Index(['date', 'exdate', 'last_date_x', 'cp_flag', 'strike_price',
       'best_bid_x', 'best_offer_x', 'volume_x', 'open_interest_x',
       'impl_volatility_x', 'delta_x', 'gamma_x', 'vega_x', 'theta_x',
       'expiry_indicator_x', 'close_x', 'adj_open_x', 'adj_close_x',
       'adj_volume_x', 'TTE_x', 'close_date', 'is_present'],
      dtype='object')
Index(['date', 'exdate', 'last_date_x', 'cp_flag', 'strike_price',
       'best_bid_x', 'best_offer_x', 'volume_x', 'open_interest_x',
       'impl_volatility_x', 'delta_x', 'gamma_x', 'vega_x', 'theta_x',
       'expiry_indicator_x', 'close_x', 'adj_open_x', 'adj_close_x',
       'adj_volume_x', 'TTE_x', 'close_date', 'is_present'],
      dtype='object')


In [20]:
calls_df['date'] = pd.to_datetime(calls_df['date'])
puts_df['date'] = pd.to_datetime(puts_df['date'])
calls_df['exdate'] = pd.to_datetime(calls_df['exdate'])
puts_df['exdate'] = pd.to_datetime(puts_df['exdate'])

calls_grouped = calls_df.groupby('date').apply(lambda x: list(zip(x['strike_price'], x['exdate']))).reset_index(name='calls_data')
puts_grouped = puts_df.groupby('date').apply(lambda x: list(zip(x['strike_price'], x['exdate']))).reset_index(name='puts_data')

merged_df = pd.merge(calls_grouped, puts_grouped, on='date')

merged_df['data_match'] = merged_df.apply(lambda row: set(row['calls_data']) == set(row['puts_data']), axis=1)

dates_data_dont_match = merged_df[merged_df['data_match'] == False]['date']
print(dates_data_dont_match)

Series([], Name: date, dtype: datetime64[ns])


In [16]:
display(calls_df)
display(puts_df)

Unnamed: 0,date,exdate,last_date_x,cp_flag,strike_price,best_bid_x,best_offer_x,volume_x,open_interest_x,impl_volatility_x,...,vega_x,theta_x,expiry_indicator_x,close_x,adj_open_x,adj_close_x,adj_volume_x,TTE_x,close_date,is_present
0,2018-01-02,2018-01-31,2018-01-02,C,269.0,2.11,2.16,477.0,37.0,0.068871,...,30.21236,-15.22501,w,268.77,242.053393,242.893856,86655749.0,29,2018-01-23,True
1,2018-01-03,2018-02-02,2018-01-03,C,270.0,2.65,2.69,289.0,719.0,0.072398,...,30.55966,-15.81073,w,270.47,243.065564,244.430187,90070416.0,30,2018-01-24,True
2,2018-01-04,2018-02-02,2018-01-04,C,272.0,2.13,2.17,769.0,6178.0,0.071179,...,30.54542,-15.79170,w,271.61,245.089905,245.460432,80595402.0,29,2018-01-25,True
3,2018-01-05,2018-02-02,2018-01-05,C,273.0,2.60,2.66,1387.0,1307.0,0.074068,...,29.90136,-16.79527,w,273.42,246.273783,247.096172,83468662.0,28,2018-01-26,False
4,2018-01-08,2018-02-07,2018-01-08,C,275.0,1.93,1.94,924.0,3846.0,0.072545,...,31.12119,-15.69601,w,273.92,246.996762,247.548034,57288979.0,30,2018-01-29,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1293,2023-02-22,2023-03-24,2023-02-22,C,399.0,9.08,9.12,1875.0,344.0,0.202075,...,45.10776,-66.59158,w,398.54,393.496030,392.530807,83574386.0,30,2023-02-28,True
1294,2023-02-23,2023-03-24,2023-02-23,C,401.0,8.79,8.82,936.0,923.0,0.196720,...,44.17986,-66.44084,w,400.66,395.505271,394.618841,95842681.0,29,2023-02-28,True
1295,2023-02-24,2023-03-24,2023-02-24,C,396.0,9.05,9.08,1151.0,225.0,0.201088,...,43.16854,-68.26598,w,396.38,389.457850,390.403375,108144866.0,28,2023-02-28,True
1296,2023-02-27,2023-03-31,2023-02-27,C,398.0,8.92,8.96,976.0,5244.0,0.194675,...,46.44293,-62.65579,m,397.73,393.840753,391.733020,80318244.0,32,2023-02-28,True


Unnamed: 0,date,exdate,last_date_x,cp_flag,strike_price,best_bid_x,best_offer_x,volume_x,open_interest_x,impl_volatility_x,...,vega_x,theta_x,expiry_indicator_x,close_x,adj_open_x,adj_close_x,adj_volume_x,TTE_x,close_date,is_present
0,2018-01-02,2018-01-31,2018-01-02,P,269.0,2.05,2.10,198.0,33.0,0.069577,...,30.11151,-11.46749,w,268.77,242.053393,242.893856,86655749.0,29,2018-01-23,True
1,2018-01-03,2018-02-02,2018-01-03,P,270.0,1.77,1.80,1060.0,330.0,0.069568,...,30.52431,-11.33461,w,270.47,243.065564,244.430187,90070416.0,30,2018-01-24,True
2,2018-01-04,2018-02-02,2018-01-04,P,272.0,2.12,2.15,416.0,304.0,0.068201,...,30.40449,-11.28616,w,271.61,245.089905,245.460432,80595402.0,29,2018-01-25,True
3,2018-01-05,2018-02-02,2018-01-05,P,273.0,1.81,1.85,509.0,149.0,0.071740,...,29.86764,-12.34094,w,273.42,246.273783,247.096172,83468662.0,28,2018-01-26,True
4,2018-01-08,2018-02-07,2018-01-08,P,275.0,2.53,2.56,82.0,269.0,0.067924,...,30.79770,-10.80407,w,273.92,246.996762,247.548034,57288979.0,30,2018-01-29,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1293,2023-02-22,2023-03-24,2023-02-22,P,399.0,9.67,9.71,2678.0,1317.0,0.201626,...,45.30915,-47.38424,w,398.54,393.496030,392.530807,83574386.0,30,2023-02-28,True
1294,2023-02-23,2023-03-24,2023-02-23,P,401.0,8.83,8.86,1233.0,1566.0,0.186139,...,44.77837,-44.24160,w,400.66,395.505271,394.618841,95842681.0,29,2023-02-28,True
1295,2023-02-24,2023-03-24,2023-02-24,P,396.0,8.81,8.84,3156.0,1086.0,0.198990,...,43.53449,-48.60954,w,396.38,389.457850,390.403375,108144866.0,28,2023-02-28,True
1296,2023-02-27,2023-03-31,2023-02-27,P,398.0,9.23,9.26,1568.0,4088.0,0.188579,...,46.60089,-42.60824,m,397.73,393.840753,391.733020,80318244.0,32,2023-02-28,True


In [22]:
calls_df.rename(columns={
    'close_x': 'close',
    'adj_open_x': 'adj_open',
    'adj_close_x': 'adj_close',
    'adj_volume_x': 'adj_volume',
    'TTE_x': 'TTE',
    'last_date_x': 'last_date_c',
    'best_bid_x': 'best_bid_c',
    'best_offer_x': 'best_offer_c',
    'volume_x': 'volume_c',
    'open_interest_x': 'open_interest_c',
    'impl_volatility_x': 'impl_volatility_c',
    'delta_x': 'delta_c',
    'gamma_x': 'gamma_c',
    'vega_x': 'vega_c',
    'theta_x': 'theta_c',
    'expiry_indicator_x': 'expiry_indicator_c',
    'is_present': 'is_present_c'
}, inplace=True)

calls_df = calls_df[['date', 'exdate', 'strike_price', 'close_date', 'close', 'adj_open', 'adj_close', 'adj_volume', 'TTE', 'last_date_c', 'best_bid_c', 'best_offer_c', 'volume_c', 'open_interest_c', 'impl_volatility_c', 'delta_c', 'gamma_c', 'vega_c', 'theta_c', 'expiry_indicator_c', 'is_present_c']]

In [23]:
puts_df.rename(columns={
    'last_date_x': 'last_date_p',
    'best_bid_x': 'best_bid_p',
    'best_offer_x': 'best_offer_p',
    'volume_x': 'volume_p',
    'open_interest_x': 'open_interest_p',
    'impl_volatility_x': 'impl_volatility_p',
    'delta_x': 'delta_p',
    'gamma_x': 'gamma_p',
    'vega_x': 'vega_p',
    'theta_x': 'theta_p',
    'expiry_indicator_x': 'expiry_indicator_p',
    'is_present': 'is_present_p'
}, inplace=True)

puts_df_selected = puts_df[['date', 'last_date_p', 'best_bid_p', 'best_offer_p', 'volume_p', 'open_interest_p', 'impl_volatility_p', 'delta_p', 'gamma_p', 'vega_p', 'theta_p', 'expiry_indicator_p', 'is_present_p']]

In [24]:
option_df = pd.merge(calls_df, puts_df_selected, on='date', how='left')

display(option_df)

Unnamed: 0,date,exdate,strike_price,close_date,close,adj_open,adj_close,adj_volume,TTE,last_date_c,...,best_offer_p,volume_p,open_interest_p,impl_volatility_p,delta_p,gamma_p,vega_p,theta_p,expiry_indicator_p,is_present_p
0,2018-01-02,2018-01-31,269.0,2018-01-23,268.77,242.053393,242.893856,86655749.0,29,2018-01-02,...,2.10,198.0,33.0,0.069577,-0.496183,0.077797,30.11151,-11.46749,w,True
1,2018-01-03,2018-02-02,270.0,2018-01-24,270.47,243.065564,244.430187,90070416.0,30,2018-01-03,...,1.80,1060.0,330.0,0.069568,-0.442522,0.074853,30.52431,-11.33461,w,True
2,2018-01-04,2018-02-02,272.0,2018-01-25,271.61,245.089905,245.460432,80595402.0,29,2018-01-04,...,2.15,416.0,304.0,0.068201,-0.508652,0.078707,30.40449,-11.28616,w,True
3,2018-01-05,2018-02-02,273.0,2018-01-26,273.42,246.273783,247.096172,83468662.0,28,2018-01-05,...,1.85,509.0,149.0,0.071740,-0.447556,0.074347,29.86764,-12.34094,w,True
4,2018-01-08,2018-02-07,275.0,2018-01-29,273.92,246.996762,247.548034,57288979.0,30,2018-01-08,...,2.56,82.0,269.0,0.067924,-0.560465,0.076891,30.79770,-10.80407,w,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1293,2023-02-22,2023-03-24,399.0,2023-02-28,398.54,393.496030,392.530807,83574386.0,30,2023-02-22,...,9.71,2678.0,1317.0,0.201626,-0.506205,0.017387,45.30915,-47.38424,w,True
1294,2023-02-23,2023-03-24,401.0,2023-02-28,400.66,395.505271,394.618841,95842681.0,29,2023-02-23,...,8.86,1233.0,1566.0,0.186139,-0.507514,0.019064,44.77837,-44.24160,w,True
1295,2023-02-24,2023-03-24,396.0,2023-02-28,396.38,389.457850,390.403375,108144866.0,28,2023-02-24,...,8.84,3156.0,1086.0,0.198990,-0.494048,0.018342,43.53449,-48.60954,w,True
1296,2023-02-27,2023-03-31,398.0,2023-02-28,397.73,393.840753,391.733020,80318244.0,32,2023-02-27,...,9.26,1568.0,4088.0,0.188579,-0.505251,0.018253,46.60089,-42.60824,m,True


In [26]:
option_df1 = option_df[(option_df['is_present_c'] == True) & (option_df['is_present_p'] == True)]
display(option_df1)

Unnamed: 0,date,exdate,strike_price,close_date,close,adj_open,adj_close,adj_volume,TTE,last_date_c,...,best_offer_p,volume_p,open_interest_p,impl_volatility_p,delta_p,gamma_p,vega_p,theta_p,expiry_indicator_p,is_present_p
0,2018-01-02,2018-01-31,269.0,2018-01-23,268.77,242.053393,242.893856,86655749.0,29,2018-01-02,...,2.10,198.0,33.0,0.069577,-0.496183,0.077797,30.11151,-11.46749,w,True
1,2018-01-03,2018-02-02,270.0,2018-01-24,270.47,243.065564,244.430187,90070416.0,30,2018-01-03,...,1.80,1060.0,330.0,0.069568,-0.442522,0.074853,30.52431,-11.33461,w,True
2,2018-01-04,2018-02-02,272.0,2018-01-25,271.61,245.089905,245.460432,80595402.0,29,2018-01-04,...,2.15,416.0,304.0,0.068201,-0.508652,0.078707,30.40449,-11.28616,w,True
4,2018-01-08,2018-02-07,275.0,2018-01-29,273.92,246.996762,247.548034,57288979.0,30,2018-01-08,...,2.56,82.0,269.0,0.067924,-0.560465,0.076891,30.79770,-10.80407,w,True
5,2018-01-09,2018-02-07,275.0,2018-01-30,274.54,247.981821,248.108343,57253957.0,29,2018-01-09,...,2.47,453.0,229.0,0.076654,-0.510832,0.069026,30.74049,-12.99855,w,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1293,2023-02-22,2023-03-24,399.0,2023-02-28,398.54,393.496030,392.530807,83574386.0,30,2023-02-22,...,9.71,2678.0,1317.0,0.201626,-0.506205,0.017387,45.30915,-47.38424,w,True
1294,2023-02-23,2023-03-24,401.0,2023-02-28,400.66,395.505271,394.618841,95842681.0,29,2023-02-23,...,8.86,1233.0,1566.0,0.186139,-0.507514,0.019064,44.77837,-44.24160,w,True
1295,2023-02-24,2023-03-24,396.0,2023-02-28,396.38,389.457850,390.403375,108144866.0,28,2023-02-24,...,8.84,3156.0,1086.0,0.198990,-0.494048,0.018342,43.53449,-48.60954,w,True
1296,2023-02-27,2023-03-31,398.0,2023-02-28,397.73,393.840753,391.733020,80318244.0,32,2023-02-27,...,9.26,1568.0,4088.0,0.188579,-0.505251,0.018253,46.60089,-42.60824,m,True


In [28]:
option_df1.columns

Index(['date', 'exdate', 'strike_price', 'close_date', 'close', 'adj_open',
       'adj_close', 'adj_volume', 'TTE', 'last_date_c', 'best_bid_c',
       'best_offer_c', 'volume_c', 'open_interest_c', 'impl_volatility_c',
       'delta_c', 'gamma_c', 'vega_c', 'theta_c', 'expiry_indicator_c',
       'is_present_c', 'last_date_p', 'best_bid_p', 'best_offer_p', 'volume_p',
       'open_interest_p', 'impl_volatility_p', 'delta_p', 'gamma_p', 'vega_p',
       'theta_p', 'expiry_indicator_p', 'is_present_p'],
      dtype='object')

In [27]:
option_df1.to_csv('option_df.csv', index=False)

### Simulation

Pseudocode format for mass simulation of individual date-strike rebalance. Essentially, we will:
1. pull in the calls_df and puts_df
2. for dates in both dataframes where is_present is true for both, perform additional analysis
3. align calls/puts on that day, specify the contract
4. for the contract in the main dataframe, find the period of stuff until the end date, inclusive, FOR THAT STRIKE CONTRACT AND EXP
5. for each day, calculate the delta sum as the sum of the call delta and -put delta
6. then take the opposite of this, that is how many Shares we must hold on that day (+ or -)
7. calculate the shareprice times shares as sharevalue
8. calculate p&L of day-to-day as change in shares times diff in shareprice
9. calculate current midprice of the option strangle, which will also be used to add to the total PL
10. On the end date, close ALL positions

Save each dataframe as an item in a variable () then store it under a same-directory data folder - name each csv as the name

Later we will also simulate:
- integer shares ie. rounded to the nearest
- trading cost as a variable %
- scaling trading algorithms? like how we talked about in class

For now, let's proceed in steps.

In [29]:
simulationdata = {}

In [None]:
data['contractcode'] = data['strike_price'].astype(str) + '_' + data['exdate'].astype(str)
option_df1['contractcode'] = option_df1['strike_price'].astype(str) + '_' + option_df1['exdate'].astype(str)

In [49]:
option_df1

Unnamed: 0,date,exdate,strike_price,close_date,close,adj_open,adj_close,adj_volume,TTE,last_date_c,...,best_offer_p,volume_p,open_interest_p,impl_volatility_p,delta_p,gamma_p,vega_p,theta_p,expiry_indicator_p,is_present_p
0,2018-01-02,2018-01-31,269.0,2018-01-23,268.77,242.053393,242.893856,86655749.0,29,2018-01-02,...,2.10,198.0,33.0,0.069577,-0.496183,0.077797,30.11151,-11.46749,w,True
1,2018-01-03,2018-02-02,270.0,2018-01-24,270.47,243.065564,244.430187,90070416.0,30,2018-01-03,...,1.80,1060.0,330.0,0.069568,-0.442522,0.074853,30.52431,-11.33461,w,True
2,2018-01-04,2018-02-02,272.0,2018-01-25,271.61,245.089905,245.460432,80595402.0,29,2018-01-04,...,2.15,416.0,304.0,0.068201,-0.508652,0.078707,30.40449,-11.28616,w,True
4,2018-01-08,2018-02-07,275.0,2018-01-29,273.92,246.996762,247.548034,57288979.0,30,2018-01-08,...,2.56,82.0,269.0,0.067924,-0.560465,0.076891,30.79770,-10.80407,w,True
5,2018-01-09,2018-02-07,275.0,2018-01-30,274.54,247.981821,248.108343,57253957.0,29,2018-01-09,...,2.47,453.0,229.0,0.076654,-0.510832,0.069026,30.74049,-12.99855,w,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1293,2023-02-22,2023-03-24,399.0,2023-02-28,398.54,393.496030,392.530807,83574386.0,30,2023-02-22,...,9.71,2678.0,1317.0,0.201626,-0.506205,0.017387,45.30915,-47.38424,w,True
1294,2023-02-23,2023-03-24,401.0,2023-02-28,400.66,395.505271,394.618841,95842681.0,29,2023-02-23,...,8.86,1233.0,1566.0,0.186139,-0.507514,0.019064,44.77837,-44.24160,w,True
1295,2023-02-24,2023-03-24,396.0,2023-02-28,396.38,389.457850,390.403375,108144866.0,28,2023-02-24,...,8.84,3156.0,1086.0,0.198990,-0.494048,0.018342,43.53449,-48.60954,w,True
1296,2023-02-27,2023-03-31,398.0,2023-02-28,397.73,393.840753,391.733020,80318244.0,32,2023-02-27,...,9.26,1568.0,4088.0,0.188579,-0.505251,0.018253,46.60089,-42.60824,m,True


In [48]:
data

Unnamed: 0,date,exdate,last_date,cp_flag,strike_price,best_bid,best_offer,volume,open_interest,impl_volatility,delta,gamma,vega,theta,expiry_indicator,close,adj_open,adj_close,adj_volume
0,2018-01-02,2018-01-03,2017-12-28,C,235.0,33.59,33.81,0.0,187.0,,,,,,w,268.77,242.053393,242.893856,86655749.0
1,2018-01-02,2018-01-03,2018-01-02,C,240.0,28.59,28.76,1.0,88.0,,,,,,w,268.77,242.053393,242.893856,86655749.0
2,2018-01-02,2018-01-03,2017-12-27,C,242.5,26.09,26.32,0.0,2.0,,,,,,w,268.77,242.053393,242.893856,86655749.0
3,2018-01-02,2018-01-03,2018-01-02,C,245.0,23.59,23.81,12.0,58.0,,,,,,w,268.77,242.053393,242.893856,86655749.0
4,2018-01-02,2018-01-03,,C,247.5,21.08,21.32,0.0,0.0,,,,,,w,268.77,242.053393,242.893856,86655749.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10448189,2023-02-28,2025-03-21,2022-12-28,P,600.0,200.50,205.50,0.0,0.0,,,,,,,396.26,391.240559,390.285185,96141367.0
10448190,2023-02-28,2025-03-21,,P,605.0,205.50,210.50,0.0,0.0,,,,,,,396.26,391.240559,390.285185,96141367.0
10448191,2023-02-28,2025-03-21,,P,610.0,210.00,215.00,0.0,0.0,,,,,,,396.26,391.240559,390.285185,96141367.0
10448192,2023-02-28,2025-03-21,,P,615.0,215.00,220.00,0.0,0.0,,,,,,,396.26,391.240559,390.285185,96141367.0


In [50]:
option_df1['exdate'] = pd.to_datetime(option_df1['exdate'])
option_df1['close_date'] = pd.to_datetime(option_df1['close_date'])
data['exdate'] = pd.to_datetime(data['exdate'])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [51]:
option_df2 = option_df1.head(1)

In [52]:
# Assuming 'option_df1' and 'data' are your input DataFrames

# Initialize a dictionary to store the DataFrames for each contract
simulationdata = {}

# Iterate through each contract in option_df1
for index, contract in option_df2.iterrows():
    # Generate a list of dates from the contract's 'date' to 'close_date'
    date_range = pd.date_range(start=contract['date'], end=contract['close_date'])

    # Initialize a list to collect data for each date in the date range
    contract_data_list = []

    # Iterate through each date in the date range
    for single_date in date_range:
        # Filter 'data' for the current date where 'exdate' and 'strike_price' match the contract
        daily_contract_data = data[
            (data['date'] == single_date) &
            (data['exdate'] == contract['exdate']) &
            (data['strike_price'] == contract['strike_price'])
        ]

        # Append the filtered data to the contract_data_list
        contract_data_list.append(daily_contract_data)

    # Concatenate all daily data into a single DataFrame for the contract
    contract_df = pd.concat(contract_data_list)

    # Store the DataFrame in simulationdata with the contract's opening date as the key
    simulationdata[contract['date']] = contract_df


In [47]:
contract_df

Unnamed: 0,date,exdate,last_date,cp_flag,strike_price,best_bid,best_offer,volume,open_interest,impl_volatility,delta,gamma,vega,theta,expiry_indicator,close,adj_open,adj_close,adj_volume


In [41]:
columns = [
    'date', 'exdate', 'strike_price', 'close_date', 'close', 'adj_open', 
    'adj_close', 'adj_volume', 'last_date_c', 'best_bid_c', 'best_offer_c', 
    'volume_c', 'open_interest_c', 'impl_volatility_c', 'delta_c', 'gamma_c', 
    'vega_c', 'theta_c', 'expiry_indicator_c', 'last_date_p', 'best_bid_p', 
    'best_offer_p', 'volume_p', 'open_interest_p', 'impl_volatility_p', 
    'delta_p', 'gamma_p', 'vega_p', 'theta_p', 'expiry_indicator_p'
]

simulationdata = {}

for index, row in option_df2.iterrows():
    date_range = pd.date_range(start=row['date'], end=row['close_date'])
    all_rows = []  # List to collect all rows before concatenation

    for single_date in date_range:
        call_data = data[(data['date'] == single_date) & (data['cp_flag'] == 'C') & 
                         (data['exdate'] == row['exdate']) & (data['strike_price'] == row['strike_price'])]
        put_data = data[(data['date'] == single_date) & (data['cp_flag'] == 'P') & 
                        (data['exdate'] == row['exdate']) & (data['strike_price'] == row['strike_price'])]

        if call_data.empty or put_data.empty:
            continue

        data_for_date = data[data['date'] == single_date]
        
        simulation_row = {column: None for column in columns}  # Initialize all columns to None
        simulation_row.update({
            'date': single_date,
            'exdate': row['exdate'],
            'strike_price': row['strike_price'],
            'close_date': row['close_date'],
            'close': data.loc[data['date'] == single_date, 'close'].iloc[0],
            'adj_open': data.loc[data['date'] == single_date, 'adj_open'].iloc[0],
            'adj_close': data.loc[data['date'] == single_date, 'adj_close'].iloc[0],
            'adj_volume': data.loc[data['date'] == single_date, 'adj_volume'].iloc[0],
            'last_date_c': call_data['last_date'].iloc[0] if not call_data.empty else None,
            'best_bid_c': call_data['best_bid'].iloc[0] if not call_data.empty else None,
            'best_offer_c': call_data['best_offer'].iloc[0] if not call_data.empty else None,
            'volume_c': call_data['volume'].iloc[0] if not call_data.empty else None,
            'open_interest_c': call_data['open_interest'].iloc[0] if not call_data.empty else None,
            'impl_volatility_c': call_data['impl_volatility'].iloc[0] if not call_data.empty else None,
            'delta_c': call_data['delta'].iloc[0] if not call_data.empty else None,
            'gamma_c': call_data['gamma'].iloc[0] if not call_data.empty else None,
            'vega_c': call_data['vega'].iloc[0] if not call_data.empty else None,
            'theta_c': call_data['theta'].iloc[0] if not call_data.empty else None,
            'expiry_indicator_c': call_data['expiry_indicator'].iloc[0] if not call_data.empty else None,
            'last_date_p': put_data['last_date'].iloc[0] if not put_data.empty else None,
            'best_bid_p': put_data['best_bid'].iloc[0] if not put_data.empty else None,
            'best_offer_p': put_data['best_offer'].iloc[0] if not put_data.empty else None,
            'volume_p': put_data['volume'].iloc[0] if not put_data.empty else None,
            'open_interest_p': put_data['open_interest'].iloc[0] if not put_data.empty else None,
            'impl_volatility_p': put_data['impl_volatility'].iloc[0] if not put_data.empty else None,
            'delta_p': put_data['delta'].iloc[0] if not put_data.empty else None,
            'gamma_p': put_data['gamma'].iloc[0] if not put_data.empty else None,
            'vega_p': put_data['vega'].iloc[0] if not put_data.empty else None,
            'theta_p': put_data['theta'].iloc[0] if not put_data.empty else None,
            'expiry_indicator_p': put_data['expiry_indicator'].iloc[0] if not put_data.empty else None,
        })

        all_rows.append(simulation_row)

    # Concatenate all rows for the current contract at once
    if all_rows:
        contract_simulation_df = pd.DataFrame(all_rows)
        simulationdata[row['date']] = contract_simulation_df

In [44]:
all_rows

[]

In [32]:
for index, row in option_df1.iterrows():
    date_range = pd.date_range(start=row['date'], end=row['close_date'])
    
    contract_simulation_df = pd.DataFrame(columns=[
        'date', 'exdate', 'strike_price', 'close_date', 'close', 'adj_open', 
        'adj_close', 'adj_volume', 'last_date_c', 'best_bid_c', 'best_offer_c', 
        'volume_c', 'open_interest_c', 'impl_volatility_c', 'delta_c', 'gamma_c', 
        'vega_c', 'theta_c', 'expiry_indicator_c', 'last_date_p', 'best_bid_p', 
        'best_offer_p', 'volume_p', 'open_interest_p', 'impl_volatility_p', 
        'delta_p', 'gamma_p', 'vega_p', 'theta_p', 'expiry_indicator_p'
    ])
    
    for single_date in date_range:
        call_data = data[(data['date'] == single_date) & (data['cp_flag'] == 'C') & 
                         (data['exdate'] == row['exdate']) & (data['strike_price'] == row['strike_price'])]
        
        put_data = data[(data['date'] == single_date) & (data['cp_flag'] == 'P') & 
                        (data['exdate'] == row['exdate']) & (data['strike_price'] == row['strike_price'])]
        
        if call_data.empty or put_data.empty:
            continue
        
        simulation_row = {
            'date': single_date,
            'exdate': row['exdate'],
            'strike_price': row['strike_price'],
            'close_date': row['close_date'],
            'close': data.loc[data['date'] == single_date, 'close'].iloc[0],
            'adj_open': data.loc[data['date'] == single_date, 'adj_open'].iloc[0],
            'adj_close': data.loc[data['date'] == single_date, 'adj_close'].iloc[0],
            'adj_volume': data.loc[data['date'] == single_date, 'adj_volume'].iloc[0],
            'last_date_c': call_data['last_date'].iloc[0] if not call_data.empty else None,
            'best_bid_c': call_data['best_bid'].iloc[0] if not call_data.empty else None,
            'best_offer_c': call_data['best_offer'].iloc[0] if not call_data.empty else None,
            'volume_c': call_data['volume'].iloc[0] if not call_data.empty else None,
            'open_interest_c': call_data['open_interest'].iloc[0] if not call_data.empty else None,
            'impl_volatility_c': call_data['impl_volatility'].iloc[0] if not call_data.empty else None,
            'delta_c': call_data['delta'].iloc[0] if not call_data.empty else None,
            'gamma_c': call_data['gamma'].iloc[0] if not call_data.empty else None,
            'vega_c': call_data['vega'].iloc[0] if not call_data.empty else None,
            'theta_c': call_data['theta'].iloc[0] if not call_data.empty else None,
            'expiry_indicator_c': call_data['expiry_indicator'].iloc[0] if not call_data.empty else None,
            'last_date_p': put_data['last_date'].iloc[0] if not put_data.empty else None,
            'best_bid_p': put_data['best_bid'].iloc[0] if not put_data.empty else None,
            'best_offer_p': put_data['best_offer'].iloc[0] if not put_data.empty else None,
            'volume_p': put_data['volume'].iloc[0] if not put_data.empty else None,
            'open_interest_p': put_data['open_interest'].iloc[0] if not put_data.empty else None,
            'impl_volatility_p': put_data['impl_volatility'].iloc[0] if not put_data.empty else None,
            'delta_p': put_data['delta'].iloc[0] if not put_data.empty else None,
            'gamma_p': put_data['gamma'].iloc[0] if not put_data.empty else None,
            'vega_p': put_data['vega'].iloc[0] if not put_data.empty else None,
            'theta_p': put_data['theta'].iloc[0] if not put_data.empty else None,
            'expiry_indicator_p': put_data['expiry_indicator'].iloc[0] if not put_data.empty else None,
        }
        
        contract_simulation_df = contract_simulation_df.append(simulation_row, ignore_index=True)
    
    simulationdata[row['date']] = contract_simulation_df

KeyboardInterrupt: 

In [39]:
folder_name = 'simdata'
if not os.path.exists(folder_name):
    os.makedirs(folder_name)

In [53]:
for date, df in simulationdata.items():
    date_str = date.strftime('%Y-%m-%d')
    filename = f"{folder_name}/{date_str}.csv"
    df.to_csv(filename, index=False)

Actual strategy simulation will involve calculated IV vs data IV and position sizing metrics/entry metrics to determine trading. We may also experiment with different sizing if time allows.

This simulation code simulates trading with respect to pairs of individual contracts, with an initial delta of as close to 1 as possible. 

The option contracts are opened at varying expiries, and are held until 1 week before expiry. 

Position in SPY is taken daily to rebalance the option delta as the underlying moves, and P&L is tracked from the option premium and SPY rebalances.
- Obtain the zero-delta pairs for the same expiry date at the interval (2 week, 1 month, 2 month), usually somewhat symmetrical about the ATM strike (how many do we want)
- Initial balance of stock for the remaining delta (because the strike is not exactly the current value of SPY, we will have a small amount of stock to delta hedge in the strangle)
- Every day, we will rebalance the position according to delta of the contracts we hold. 
- Save the daily position, and calculate daily PL vectorized.
- We will consider the close as our trading price for now.

Other considerations and parameters are described in ```outline.md```. This simulator does not actually outline the requirements or thresholds for opening a position, rather merely simulates all of the possible individual positions for our time frame.

For finding zero-delta pairs - we will start at the start_date of the simulation. Then we will calculate the pairs that can be evenly matched.



Currently, we need to simulate
Include zipfile: small data sample, pitchbook, papers

In [None]:
class Simulate:
    def __init__(self, data):
        """
        Initialize the simulation with the provided data.
        
        :param data: A DataFrame containing options data.
        """
        self.data = data
        self.positions = None
        self.daily_pl = None

    def find_zero_delta_pairs(self, intervals):
        """
        Identify zero-delta pairs for specified intervals.

        :param intervals: A list of intervals to find zero-delta pairs for.
        """

    def calculate_initial_position(self):
        """
        Calculate the initial stock position needed to balance the delta.
        """

    def rebalance_daily(self):
        """
        Rebalance the stock position daily based on the option contracts' delta.
        """

    def calculate_daily_pl(self):
        """
        Calculate daily profit and loss.
        """

    def run_simulation(self):
        """
        Run the full simulation, executing all necessary steps.
        """

    def save_results(self, filename):
        """
        Save the daily positions and P&L calculations to a file.

        :param filename: The name of the file to save the results to.
        """

simulate = Simulate(data)
simulate.run_simulation()
