### Import required packages and functions

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from pandas.tseries.offsets import BusinessDay
import pandas_datareader.data as web
import warnings
warnings.filterwarnings("ignore")
from functions import delta_hedging, calendar_to_business_days, create_straddles, process_straddles

### Initialize important dates

In [2]:
start_date = '2007-01-01'
end_date = '2023-09-30'

training_start_date = '2007-01-01'
training_end_date = '2015-12-31'
test_start_date = '2016-01-01'
test_end_date = '2023-08-31'

### Fetch and Process S&P prices data

In [3]:
sp = yf.download('^SPX')

sp.index = pd.to_datetime(sp.index)

# Filter the DataFrame
filtered_sp = sp.loc[start_date:end_date]

# Remove the ticker level from the multi index
filtered_sp.columns = filtered_sp.columns.droplevel(1)
filtered_sp = pd.DataFrame(filtered_sp['Close'], index=filtered_sp.index)
filtered_sp.index = pd.to_datetime(filtered_sp.index).tz_localize(None)

[*********************100%***********************]  1 of 1 completed


### Fetch and Process Fama French Risk free rate data

In [4]:
# Fetch the Fama-French 3-factor data
ff_data = web.DataReader('F-F_Research_Data_Factors', 'famafrench', start_date, end_date)

# Convert period index to datetime
rf_data = ff_data[0]  # Extract the first dataframe (RF)
rf_data.index = rf_data.index.to_timestamp()  

# Select the RF column (risk-free rate)
risk_free_rate = pd.DataFrame(rf_data['RF'])

# Resample to daily frequency, forward-fill missing values (public holidays), and annualize the risk free rate
risk_free_rate = risk_free_rate.resample('D').ffill()
risk_free_rate = risk_free_rate * 12 / 100
risk_free_rate

  ff_data = web.DataReader('F-F_Research_Data_Factors', 'famafrench', start_date, end_date)
  ff_data = web.DataReader('F-F_Research_Data_Factors', 'famafrench', start_date, end_date)


Unnamed: 0_level_0,RF
Date,Unnamed: 1_level_1
2007-01-01,0.0528
2007-01-02,0.0528
2007-01-03,0.0528
2007-01-04,0.0528
2007-01-05,0.0528
...,...
2023-08-28,0.0540
2023-08-29,0.0540
2023-08-30,0.0540
2023-08-31,0.0540


### Reindex data to forward fill prices and risk free for public holidays

In [5]:
market_df = filtered_sp.merge(risk_free_rate, left_index=True, right_index=True)

# Create a new business day index
new_index = pd.bdate_range(start=start_date, end=end_date)

# Create a new DataFrame with the business day index
real_market_df = pd.DataFrame(index=new_index)

# Merge the existing market_df with the new DataFrame
real_market_df = real_market_df.merge(market_df, left_index=True, right_index=True, how='left')

# Forward fill the missing values
real_market_df.ffill(inplace=True)

# Rename the index to 'Date' if necessary
real_market_df.index.name = 'Date'

# Display the updated DataFrame
real_market_df

Unnamed: 0_level_0,Close,RF
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2007-01-01,,
2007-01-02,,
2007-01-03,1416.599976,0.0528
2007-01-04,1418.339966,0.0528
2007-01-05,1409.709961,0.0528
...,...,...
2023-09-25,4515.770020,0.0516
2023-09-26,4515.770020,0.0516
2023-09-27,4515.770020,0.0516
2023-09-28,4515.770020,0.0516


### Read Option data

In [6]:
# Read data from CSV
data = pd.read_csv('filtered_data2.csv')
data.set_index('Date', inplace=True)

### Define training and testing data

In [7]:
data_train = data[training_start_date:training_end_date]
option_df_train = data_train

data_test = data[test_start_date:test_end_date]
option_df_test = data_test

### Compute MidPrice and convert calendar days to business days

In [8]:
# Get midprice for all data
data_train['Midprice'] = (data_train['best_bid'] + data_train['best_offer']) / 2
data_test['Midprice'] = (data_test['best_bid'] + data_test['best_offer']) / 2

# convert calendar to business days
option_df_train['Business_Days_to_Expiration'] = option_df_train.apply(lambda row: calendar_to_business_days(row.name, row['exdate'], row['D to Expiration']), axis=1)
option_df_test['Business_Days_to_Expiration'] = option_df_test.apply(lambda row: calendar_to_business_days(row.name, row['exdate'], row['D to Expiration']), axis=1)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_train['Midprice'] = (data_train['best_bid'] + data_train['best_offer']) / 2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_test['Midprice'] = (data_test['best_bid'] + data_test['best_offer']) / 2
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  option_df_train['Business_Days_to_Expiration']

### Keep only options with 1 month or more to maturity

In [9]:
data_1m_train = data_train[data_train['Business_Days_to_Expiration'] >= 22]
data_1m_train.index = pd.to_datetime(data_1m_train.index)

data_1m_test = data_test[data_test['Business_Days_to_Expiration'] >= 22]
data_1m_test.index = pd.to_datetime(data_1m_test.index)

In [10]:
data_1m_train

Unnamed: 0_level_0,exdate,cp_flag,strike_price,best_bid,best_offer,volume,impl_volatility,delta,gamma,optionid,contract_size,D to Expiration,Close,Moneyness,Midprice,Business_Days_to_Expiration
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2007-01-09,2007-02-17,C,1425.0,14.6,16.2,1566,0.099818,0.455103,0.008711,32304269,100,39,1412.109985,1.009128,15.40,29
2007-01-09,2007-02-17,C,1430.0,12.2,13.8,221,0.098095,0.410637,0.008697,32309398,100,39,1412.109985,1.012669,13.00,29
2007-01-09,2007-02-17,C,1435.0,10.0,11.6,184,0.096213,0.365685,0.008576,32309402,100,39,1412.109985,1.016210,10.80,29
2007-01-09,2007-02-17,C,1440.0,8.3,9.3,220,0.094129,0.320593,0.008341,32303409,100,39,1412.109985,1.019751,8.80,29
2007-01-09,2007-02-17,C,1445.0,6.6,7.6,61,0.092452,0.277198,0.007950,32303408,100,39,1412.109985,1.023291,7.10,29
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2015-12-31,2016-02-05,P,1920.0,12.1,12.5,96,0.211088,-0.165773,0.001837,109720047,100,36,2043.939941,0.939362,12.30,26
2015-12-31,2016-02-05,P,1925.0,12.7,13.1,49,0.209026,-0.173527,0.001910,109720048,100,36,2043.939941,0.941808,12.90,26
2015-12-31,2016-02-05,P,1940.0,14.7,15.1,13,0.202866,-0.199030,0.002143,109720051,100,36,2043.939941,0.949147,14.90,26
2015-12-31,2016-02-05,P,1990.0,24.1,24.5,12,0.182378,-0.312409,0.003022,109720061,100,36,2043.939941,0.973610,24.30,26


### Create Straddles for each dates

In [11]:
# Group the data by date, strike price, and expiration date
groups_train = data_1m_train.groupby(['Date', 'strike_price', 'Business_Days_to_Expiration'])
groups_test = data_1m_test.groupby(['Date', 'strike_price', 'Business_Days_to_Expiration'])

straddles_df_train = create_straddles(groups_train)
straddles_df_test = create_straddles(groups_test)

### If the straddle starts later than 10th of the month, remove it to comply with the requirement that we choose a straddle at the "beginning" of the month

In [12]:
for dates in straddles_df_train.index:
    if dates.day > 10:
        straddles_df_train.drop(dates, inplace=True, errors='ignore')

for dates in straddles_df_test.index:
    if dates.day > 10:
        straddles_df_test.drop(dates, inplace=True, errors='ignore')

In [13]:
straddles_df_test

Unnamed: 0_level_0,Strike_Price,D to Expiration,Call_Midprice,Put_Midprice,Total_Cost,Underlying Price,Call_Delta,Put_Delta,Call_Optionid,Put_Optionid,Moneyness
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2016-01-04,1830.0,29,186.35,10.45,196.80,2012.660034,0.873377,-0.122999,109769466,109769648,0.909244
2016-01-04,1850.0,29,168.30,12.40,180.70,2012.660034,0.851430,-0.144999,109769470,109769652,0.919182
2016-01-04,1860.0,29,159.35,13.60,172.95,2012.660034,0.839430,-0.157899,109769472,109769654,0.924150
2016-01-04,1865.0,29,155.10,14.25,169.35,2012.660034,0.831967,-0.164777,109769473,109769655,0.926634
2016-01-04,1875.0,24,144.75,12.00,156.75,2012.660034,0.839585,-0.156610,109719857,109720038,0.931603
...,...,...,...,...,...,...,...,...,...,...,...
2023-08-09,4510.0,22,53.35,81.05,134.40,4467.709961,0.445052,-0.552319,155952151,155952233,1.009466
2023-08-10,4370.0,26,155.95,40.45,196.40,4468.830078,0.720425,-0.285514,153998536,153998764,0.977885
2023-08-10,4390.0,26,139.60,44.15,183.75,4468.830078,0.691232,-0.312958,153649185,153649192,0.982360
2023-08-10,4405.0,26,129.00,48.45,177.45,4468.830078,0.668037,-0.336566,155270831,155270874,0.985717


### For each month, choose only one straddle, the one with a moneyness closest to 1

In [14]:
first_straddles_monthly_train = process_straddles(straddles_df_train)
first_straddles_monthly_test = process_straddles(straddles_df_test)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_straddles_daily.drop(columns=['YearMonth', 'Moneyness_Diff'], inplace=True)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_straddles_daily.drop(columns=['YearMonth', 'Moneyness_Diff'], inplace=True)


In [15]:
# No transaction cost, training P&L
profit_no_cost_train = delta_hedging(first_straddles_monthly_train, option_df_train, real_market_df, transaction_cost=0)
profit_no_cost_train

  sp_prices = market_df['Close'].reindex(full_calendar).fillna(method='ffill')
  rf_rates = market_df['RF'].reindex(full_calendar).fillna(method='ffill')


Unnamed: 0,Date,Initial Value,Final Value,Hedged_Position,Value
0,2007-01-10,37.15,6.805377,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 37.147...
1,2007-02-07,34.45,-28.375485,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 34.495...
2,2007-04-10,40.70,-31.781291,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 40.737...
3,2007-05-10,47.35,4.728391,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 47.399...
4,2007-07-10,62.15,-1.532786,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 62.186...
...,...,...,...,...,...
73,2015-08-05,57.65,-90.059961,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 57.65 1 2...
74,2015-09-02,99.65,73.469946,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 99.65 1 2...
75,2015-10-07,73.75,-31.179932,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 73.75 1 2...
76,2015-11-02,58.40,3.020951,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 58.400...


In [16]:
# Transaction cost, testing P&L
profit_cost_train = delta_hedging(first_straddles_monthly_train, option_df_train, real_market_df, transaction_cost=0.005)

  sp_prices = market_df['Close'].reindex(full_calendar).fillna(method='ffill')
  rf_rates = market_df['RF'].reindex(full_calendar).fillna(method='ffill')


In [17]:
# No transaction cost, training P&L
profit_no_cost_test = delta_hedging(first_straddles_monthly_test, option_df_test, real_market_df, transaction_cost=0)

In [18]:
# Transaction cost, testing P&L
profit_cost_test = delta_hedging(first_straddles_monthly_test, option_df_test, real_market_df, transaction_cost=0.005)

In [22]:
profit_cost_test

Unnamed: 0,Date,Initial Value,Final Value,Hedged_Position,Value
0,2016-01-06,83.50,8.877189,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 83.458...
1,2016-02-04,95.00,20.208611,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 94.877...
2,2016-03-10,79.65,-13.605451,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 79.406...
3,2016-04-05,72.95,53.093268,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 72.574...
4,2016-05-04,66.45,10.688158,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 66.195...
...,...,...,...,...,...
87,2023-04-06,173.75,145.836832,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 172.2...
88,2023-05-08,141.80,-14.346315,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 140.4...
89,2023-06-08,108.60,0.598060,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 106.4...
90,2023-07-10,131.60,69.322679,Date Hedged_Position 0 2007-01-01 ...,Date Value 0 2007-01-01 129.6...
