In [1]:
import pandas as pd
import yfinance as yf
import pandas_datareader.data as web
import pandas_datareader as pdr
import numpy as np

In [2]:
sp = yf.download('^SPX')

[*********************100%***********************]  1 of 1 completed


In [3]:
# Fetch data for SPY (ETF tracking S&P 500)
sp500_etf = yf.Ticker("SPY")

# Get historical dividends and prices
dividends = sp500_etf.dividends
prices = sp500_etf.history(period="max")['Close']

# Create a DataFrame to align data
data_etf = pd.DataFrame({'Dividends': dividends, 'Close_Price': prices})

# Compute dividend yield only on dividend days annualized
data_etf['Dividend_Yield'] = (data_etf['Dividends'] / data_etf['Close_Price'])* 4

# Forward-fill the dividend yield to fill missing days
data_etf['Dividend_Yield'] = data_etf['Dividend_Yield'].ffill()

# Filter the data from 2000 onwards
data_etf = data_etf.loc['2000-01-01':]
data_etf.index = pd.to_datetime(data_etf.index).tz_localize(None)

In [4]:
sp.index = pd.to_datetime(sp.index)

# Filter the DataFrame
start_date = '2000-01-01'
end_date = '2023-08-31'
filtered_sp = sp.loc[start_date:end_date]
# Remove the 'Ticker' level from the MultiIndex
filtered_sp.columns = filtered_sp.columns.droplevel(1)
filtered_sp = pd.DataFrame(filtered_sp['Close'], index=filtered_sp.index)
filtered_sp.index = pd.to_datetime(filtered_sp.index).tz_localize(None)
filtered_sp

Unnamed: 0_level_0,Close
Date,Unnamed: 1_level_1
2000-01-03,1455.219971
2000-01-04,1399.420044
2000-01-05,1402.109985
2000-01-06,1403.449951
2000-01-07,1441.469971
...,...
2023-08-25,4405.709961
2023-08-28,4433.310059
2023-08-29,4497.629883
2023-08-30,4514.870117


In [5]:
data_options = pd.read_csv("data_option_SP.csv")

In [6]:
data_options['date'] = pd.to_datetime(data_options['date'])
data_options['exdate'] = pd.to_datetime(data_options['exdate'])
data_options['D to Expiration'] = data_options['exdate'] - data_options['date']
data_options['D to Expiration'] = data_options['D to Expiration'].dt.days
data_options.drop(columns=['exdate', 'last_date', 'issuer', 'exercise_style', 'forward_price'], inplace=True)
data_options.set_index('date', inplace=True)
data_options['strike_price'] = data_options['strike_price'] / 1000

In [7]:
# Fetch the Fama-French 3-factor data
ff_data = web.DataReader('F-F_Research_Data_Factors', 'famafrench', start_date, end_date)

# Convert PeriodIndex to DatetimeIndex
rf_data = ff_data[0]  # Extract the first dataframe
rf_data.index = rf_data.index.to_timestamp()  # Convert PeriodIndex to Timestamp

# Select the RF column (risk-free rate)
risk_free_rate = pd.DataFrame(rf_data['RF'])

# Resample to daily frequency and forward-fill missing values
risk_free_rate = risk_free_rate.resample('D').ffill()
risk_free_rate = risk_free_rate / 100
risk_free_rate

  ff_data = web.DataReader('F-F_Research_Data_Factors', 'famafrench', start_date, end_date)
  ff_data = web.DataReader('F-F_Research_Data_Factors', 'famafrench', start_date, end_date)


Unnamed: 0_level_0,RF
Date,Unnamed: 1_level_1
2000-01-01,0.0041
2000-01-02,0.0041
2000-01-03,0.0041
2000-01-04,0.0041
2000-01-05,0.0041
...,...
2023-07-28,0.0045
2023-07-29,0.0045
2023-07-30,0.0045
2023-07-31,0.0045


In [8]:
full_data = data_options.merge(filtered_sp, left_index=True, right_index=True)
full_data = full_data.merge(data_etf['Dividend_Yield'], left_index=True, right_index=True)
full_data = full_data.merge(risk_free_rate, left_index=True, right_index=True)
full_data

Unnamed: 0,cp_flag,strike_price,best_bid,best_offer,volume,impl_volatility,delta,gamma,optionid,contract_size,index_flag,D to Expiration,Close,Dividend_Yield,RF
2000-01-03,P,1505.0,55.250,57.250,0,0.212891,-0.734362,0.004767,10000760,100,1,19,1455.219971,0.015255,0.0041
2000-01-03,P,1545.0,89.625,91.625,0,0.233571,-0.858130,0.002975,10007866,100,1,19,1455.219971,0.015255,0.0041
2000-01-03,C,750.0,706.500,708.500,0,,,,10010807,100,1,75,1455.219971,0.015255,0.0041
2000-01-03,C,1450.0,135.750,137.750,0,0.229435,0.608151,0.001353,10011763,100,1,257,1455.219971,0.015255,0.0041
2000-01-03,P,1350.0,40.750,42.750,290,0.252872,-0.256190,0.001296,10016917,100,1,166,1455.219971,0.015255,0.0041
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,P,5700.0,874.400,921.400,0,0.112256,-0.937313,0.000218,155477343,100,1,332,4576.729980,0.015164,0.0045
2023-08-01,P,5800.0,966.100,1013.300,2,0.112683,-0.951903,0.000167,155271050,100,1,332,4576.729980,0.015164,0.0045
2023-08-01,P,5900.0,1058.900,1106.100,0,0.113260,-0.962578,0.000126,155715629,100,1,332,4576.729980,0.015164,0.0045
2023-08-01,P,6000.0,1151.700,1198.100,0,0.107900,-0.975343,0.000077,155271051,100,1,332,4576.729980,0.015164,0.0045


In [9]:
full_data['Forward Price'] = full_data['Close'] * np.exp((full_data['RF'] - full_data['Dividend_Yield']) * (full_data['D to Expiration'] / 365))

In [10]:
full_data['Moneyness'] = full_data['strike_price'] / full_data['Close']

In [11]:
full_data

Unnamed: 0,cp_flag,strike_price,best_bid,best_offer,volume,impl_volatility,delta,gamma,optionid,contract_size,index_flag,D to Expiration,Close,Dividend_Yield,RF,Forward Price,Moneyness
2000-01-03,P,1505.0,55.250,57.250,0,0.212891,-0.734362,0.004767,10000760,100,1,19,1455.219971,0.015255,0.0041,1454.375181,1.034208
2000-01-03,P,1545.0,89.625,91.625,0,0.233571,-0.858130,0.002975,10007866,100,1,19,1455.219971,0.015255,0.0041,1454.375181,1.061695
2000-01-03,C,750.0,706.500,708.500,0,,,,10010807,100,1,75,1455.219971,0.015255,0.0041,1451.888126,0.515386
2000-01-03,C,1450.0,135.750,137.750,0,0.229435,0.608151,0.001353,10011763,100,1,257,1455.219971,0.015255,0.0041,1443.834531,0.996413
2000-01-03,P,1350.0,40.750,42.750,290,0.252872,-0.256190,0.001296,10016917,100,1,166,1455.219971,0.015255,0.0041,1447.855728,0.927695
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,P,5700.0,874.400,921.400,0,0.112256,-0.937313,0.000218,155477343,100,1,332,4576.729980,0.015164,0.0045,4532.550471,1.245431
2023-08-01,P,5800.0,966.100,1013.300,2,0.112683,-0.951903,0.000167,155271050,100,1,332,4576.729980,0.015164,0.0045,4532.550471,1.267280
2023-08-01,P,5900.0,1058.900,1106.100,0,0.113260,-0.962578,0.000126,155715629,100,1,332,4576.729980,0.015164,0.0045,4532.550471,1.289130
2023-08-01,P,6000.0,1151.700,1198.100,0,0.107900,-0.975343,0.000077,155271051,100,1,332,4576.729980,0.015164,0.0045,4532.550471,1.310980


In [12]:
mask_money = (full_data['Moneyness'] > 0.98) & (full_data['Moneyness'] < 1.02)
mask_time = (full_data['D to Expiration'] > 30) & (full_data['D to Expiration'] < 37)
data_atm = full_data[mask_money & mask_time]

In [13]:
data_atm

Unnamed: 0,cp_flag,strike_price,best_bid,best_offer,volume,impl_volatility,delta,gamma,optionid,contract_size,index_flag,D to Expiration,Close,Dividend_Yield,RF,Forward Price,Moneyness
2000-01-14,C,1475.0,29.375,31.375,526,0.173389,0.500135,0.005070,10032934,100,1,36,1465.150024,0.015255,0.0041,1463.538866,1.006723
2000-01-14,C,1465.0,35.250,37.250,5,0.178191,0.549749,0.004894,10136376,100,1,36,1465.150024,0.015255,0.0041,1463.538866,0.999898
2000-01-14,C,1450.0,44.625,46.625,1085,0.183699,0.619357,0.004569,10251188,100,1,36,1465.150024,0.015255,0.0041,1463.538866,0.989660
2000-01-14,C,1470.0,32.250,34.250,3,0.175778,0.525286,0.004991,10389741,100,1,36,1465.150024,0.015255,0.0041,1463.538866,1.003310
2000-01-14,P,1475.0,32.000,33.625,1528,0.175503,-0.499293,0.005009,10895281,100,1,36,1465.150024,0.015255,0.0041,1463.538866,1.006723
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2023-08-01,P,4645.0,86.700,87.200,18,0.110157,-0.625078,0.002580,156033400,100,1,31,4576.729980,0.015164,0.0045,4572.586623,1.014917
2023-08-01,P,4650.0,89.700,90.100,25,0.109649,-0.638380,0.002561,155595808,100,1,31,4576.729980,0.015164,0.0045,4572.586623,1.016009
2023-08-01,P,4655.0,92.700,93.200,0,0.109204,-0.651533,0.002538,156047177,100,1,31,4576.729980,0.015164,0.0045,4572.586623,1.017102
2023-08-01,P,4660.0,95.900,96.300,6,0.108827,-0.664495,0.002511,155661032,100,1,31,4576.729980,0.015164,0.0045,4572.586623,1.018194
