In [None]:
import pandas as pd
import numpy as np
import math
import datetime
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import scipy
from scipy import optimize
from IPython.display import display, Math, Latex

# Parent folder
import pathlib
from pathlib import Path  
pf = pathlib.Path().resolve() # Points to parent folder containing notebook and data, eg: 'C:/Users/Carla/Dropbox/Uni/10. Semester/Dynamic Programming/Term paper'

from tqdm import tqdm # for-loop progress bar

# Descriptive

### Create data sets - trading volume - incl. short term

In [None]:
# Full period
start = '1996-01-01'
end = '2021-12-31'

# Read downloaded data
df = pd.read_csv (f'{pf}/data/input/rawdata_01jan1996to31dec2021_extract.csv') # 28m rows in full data set

# read dividend yield data
df_snp_dy =  pd.read_excel(f'{pf}/data/input/div_shiller.xls')  # from # http://www.econ.yale.edu/~shiller/data.htm   

# resample with forward fill to put set the value of the month for each day 
df_snp_dy['date'] = pd.to_datetime(df_snp_dy.date, format='%d/%m/%Y')
df_snp_dy = df_snp_dy.set_index('date').resample('D').ffill()

# treasury rate data
df_trate_1mo3mo = pd.read_excel(f'{pf}/data/input/trate_1mo3mo.xlsx', index_col=0)

# Read sp500 data file
df_snp = pd.read_csv (f'{pf}/data/input/SP500rawdata_1995-01-01to2021-12-31_extract.csv')

### Format date columns
df['date'] =  pd.to_datetime(df['date'], format='%Y-%m-%d')
df['exdate'] =  pd.to_datetime(df['exdate'], format='%Y-%m-%d')
df_snp['date'] =  pd.to_datetime(df_snp['date'], format='%Y-%m-%d')

### Merge with snp and treasury data
df = pd.merge(left=df, right=df_snp, left_on="date", right_on="date", how="left")
df = pd.merge(left=df, right=df_snp_dy, left_on="date", right_on="date", how="left")
df = pd.merge(left=df, right=df_trate_1mo3mo, left_on="date", right_on="date", how="left")

### Calculated variables
# Calculate tau (days to expiration)
df['tau']=df['exdate']-df['date'] # shows in X days
df['tau'] = (df['tau'] / np.timedelta64(1,'D')).astype(int) # as int

# Calculate price (avg of bid/ask)
df['price'] = (df['best_bid']+df['best_offer'])/2

# Calculate strike (divide by 1000)
df['strike'] = df['strike_price']/1000

# Moving average dividend yield
df['dy_ma'] = df['dividend'] / df['snp21ma']

# Moneyness variable
df['money'] = np.log(df['strike']*np.exp(-(df['tr']-df['dy_ma'])*df['tau']/252)/df['snp'])

### Filters
# Filter volume > 0
df = df[df.volume > 0]

# Filter bid/ask > 0.05
df = df[(df['best_bid'] >=0.05) | (df['best_offer'] >=0.05)]
df['spread']=df['best_offer']-df['best_bid']
df = df[df.spread>=0]

# Filter tau < 365
#df = df[df.tau >= 9]
df = df[df.tau <= 365]

# Filter impl. vol > 0
df = df.dropna(subset=['impl_volatility'])
df = df[df.impl_volatility > 0]

# Filter for non standard expiry dates 
df['exdate_day'] = df.exdate.dt.day
df['exdate_weekday'] = df.exdate.dt.dayofweek
df = df[df['exdate_day'] < 27]
df = df[df['exdate_weekday'] >= 4]

# Create  maturity groups
df['tau_years'] = df['tau']/365 # tau in years
#df['maturity_group'] = np.where(df['tau_years']<0.25, "low", np.where(df['tau_years']>0.5, "high", "med")) # group tau_years by, 0-0.25 = low, 0.25-0.50 = med, tau_years > 0.5 = high
df['maturity_group'] = np.where(df['tau_years']<8/365, "vlow",  np.where((df['tau_years']<0.25)&(df['tau_years']<0.25), "low", np.where(df['tau_years']>0.5, "high", "med"))) #

# create new table: count n by date, cp_flag, tau
df_tcount = df.groupby(['date', 'cp_flag', 'tau']).size() #.groupby(level=1).max()
df_tcount = df_tcount.to_frame(name='n')
df_tcount = df_tcount.reset_index()

# merge count to main table
df = pd.merge(left=df, right=df_tcount, left_on=['date','cp_flag', 'tau'], right_on=['date','cp_flag', 'tau'], how='left')

# keep only groups with n > 3
df = df[df['n'] > 3]

# Sort data
df = df.sort_values(by=['date','cp_flag','exdate', 'strike'])

# Set index to date
df = df.set_index('date')
df #1542328 , 1533666, 

In [None]:
###### SP500   
###### Put and call volume                      
df_vol_cp = df.groupby(['cp_flag','date'])['volume'].sum().reset_index() # Group by total volume for puts and calls

###### Total volume by tau groups
df_vol_grps = df.groupby(['date','maturity_group'])['volume'].sum().reset_index() # Group by total volume for puts and calls


###### Min and Max of strike price range for put and calls by date
df_range_cp_min = df.groupby(['cp_flag','date'])['strike'].min().reset_index() # Group by total volume for puts and calls
df_range_cp_max = df.groupby(['cp_flag','date'])['strike'].max().reset_index() # Group by total volume for puts and calls

###### Put call disparity  - disparity = St -C(k,tau) - P(k, tau) + ke**rtau 
df_ = df_[['tr', 'snp','cp_flag', 'tau', 'strike', 'price', 'money']].reset_index()

# Dato, Tau, Strike interesct where we have both put and call
df_intersect = df_.groupby(['date','tr','snp','tau', 'strike'])['cp_flag'].size().reset_index() #group dato, tau, strike
df_intersect = df_intersect[df_intersect.cp_flag >=2] # select where there is both a call and a put observation

df__call = df_[df_.cp_flag == 'C'] # call prices
df__call['Cprice'] = df__call['price'] # call prices
df__call = df__call[['date', 'tau', 'strike', 'Cprice' ,'money']]


df__put = df_[df_.cp_flag == 'P'] # put prices
df__put['Pprice'] = df__put['price'] #
df__put = df__put[['date', 'tau', 'strike', 'Pprice', 'money']]

# Join call and put prices on df with intersect
df_pcd = pd.merge(left=df_intersect, right=df__call, left_on=['date','tau', 'strike'], right_on=['date','tau', 'strike'], how='left')
df_pcd = pd.merge(left=df_pcd, right=df__put, left_on=['date','tau', 'strike'], right_on=['date','tau', 'strike'], how='left')


#moneyneess group for calls
df_pcd['moneyness_grp'] = np.where(df_pcd['money_x']<-0.10, "ITM", np.where(df_pcd['money_x']>0.1, "OTM", 'ATM')) #add money group for calls


df_pcd['tau_years'] = df_pcd['tau']/365 # tau in years
df_pcd['pcdisparity'] = df_pcd['snp']-(df_pcd['Cprice']-df_pcd['Pprice']+df_pcd['strike']*np.exp(-df_pcd['tr']*df_pcd["tau_years"]))

#add maturity group
df_pcd['maturity_group'] = np.where(df_pcd['tau_years']<8/365, "vlow",  np.where((df_pcd['tau_years']<0.25)&(df_pcd['tau_years']<0.25), "low", np.where(df_pcd['tau_years']>0.5, "high", "med"))) #
df_pcd = df_pcd.dropna(subset='pcdisparity')

# average put call disparity over groups and strikes                   
df_pcd_grps = df_pcd.groupby(['date','maturity_group'])['pcdisparity'].mean().reset_index() # Group by total volume for puts and calls

# average put call disparity over groups and strikes  and money
df_pcd_grps_money = df_pcd.groupby(['date', 'moneyness_grp','maturity_group'])['pcdisparity'].mean().reset_index() # Group by total volume for puts and calls


## Table: Volume of traded options by moneyness and maturity groups

In [None]:
# Select puts and create money groups
df_puts = df[df.cp_flag=='P']
df_puts['moneyness_grp'] = np.where(df_puts['money']<-0.10, "OTM", np.where(df_puts['money']>0.1, "ITM", "ATM"))
# Select puts and create money groups
df_calls = df[df.cp_flag=='C']
df_calls['moneyness_grp'] = np.where(df_calls['money']<-0.10, "ITM", np.where(df_calls['money']>0.1, "OTM", "ATM"))

###### Min and Max of strike price range for put and calls by date
#Calls
df_calls_vol_sum = df_calls.groupby(['moneyness_grp','maturity_group', 'date'])['volume'].sum().reset_index() # Group by total daily trading volume
df_calls_vol_mean_grps =  df_calls_vol_sum.groupby(['moneyness_grp','maturity_group'])['volume'].mean().reset_index() # Mean trading volume by group
df_calls_pivot = df_calls_vol_mean_grps.pivot(index="moneyness_grp", columns="maturity_group", values="volume") # pivot

#Puts
df_puts_vol_sum = df_puts.groupby(['moneyness_grp','maturity_group', 'date'])['volume'].sum().reset_index() # Group by total daily trading volume
df_puts_vol_mean_grps =  df_puts_vol_sum.groupby(['moneyness_grp','maturity_group'])['volume'].mean().reset_index() # Mean trading volume by group
df_puts_pivot = df_puts_vol_mean_grps.pivot(index="moneyness_grp", columns="maturity_group", values="volume") # pivot

#Concat to create table for pres
df_vol_tab = pd.concat([df_calls_pivot, df_puts_pivot], axis=0)

df_vol_tab['total']=df_vol_tab['vlow']+df_vol_tab['low']+df_vol_tab['med']+df_vol_tab['high']

# add total column

#Re-order columns
df_vol_tab = df_vol_tab[['vlow', 'low', 'med', 'high', 'total']]

# Print to latex
print(df_vol_tab.style.format(subset=['vlow','low','med','high', 'total'], precision=0, escape='Latex').to_latex())

## Figure: Volume of traded options

In [None]:
plot_snp = False #if true, plots SNP on 2nd y-axis

tau_grps = ['vlow', 'low','med', 'high']
labels = ['weeklies', '1w-3m','3-6m', '6-12m']
colors = ['red', 'blue', 'orange', 'green']

fig, ax = plt.subplots(1, 1, figsize=(10, 6), constrained_layout=True)

for j, grp in enumerate(tau_grps):
    
    # Select group
    df = df_vol_grps[df_vol_grps['maturity_group']==grp]
    
    # X -Values
    x = df['date']
    
    # Y - Values 
    y = df['volume'].rolling(63).mean() #rolling average 3m - 63 trading days
    
    # Plot
    ax.plot(x, y, label = f'tau grp = {grp}', color=colors[j], alpha=0.5)
    
# Axes, Labels, title etc 
ax.set_xlim([pd.to_datetime(start, format = '%Y-%m-%d'),pd.to_datetime(end, format = '%Y-%m-%d')])
ax.set_ylabel(r'Average daily trading volume')
ax.set_title(r'3-month moving average of daily trading volume by maturity groups', loc='center', fontsize='medium')
ax.legend(loc='upper left')

# Plot sout on second y-axis
if plot_snp == True:    
    ax2=ax.twinx()
    ax2.plot(df_snp['date'], df_snp['snp'], color = 'black', label ='SP500')
    ax2.set_ylabel("S&P 500")
    ax2.legend(loc='upper right')
    #ax.set_ylim(0, 5000)

filepath = Path(f'{pf}/data/figures/descriptive_vol_grps') # Set name
plt.savefig(filepath) #bbox_inches='tight'


## Figure: Strike price range

In [None]:
plot_snp = False #if true, plots SNP on 2nd y-axis

cp_flags = ['P', 'C']
colors = ['blue', 'green']

fig, ax = plt.subplots(1, 1, figsize=(10, 6), constrained_layout=True)

for j, cp_flag in enumerate(cp_flags):
    
    # Select group
    df_min = df_range_cp_min[df_range_cp_min['cp_flag']==cp_flag]
    df_max = df_range_cp_max[df_range_cp_max['cp_flag']==cp_flag]
 
    # X - Values
    x = df_min['date']
    
    # Y - Values 
    y_min = df_min['strike'].rolling(63).mean()
    y_max = df_max['strike'].rolling(63).mean()
    
    # Plot
    label = 'Calls' if cp_flag == 'C' else 'Puts'
    ax.fill_between(x, y_min, y_max, label = f'{label}', color=colors[j], alpha=0.5)
    
ax.plot(df_snp['date'], df_snp['snp'], label = f'SP500', color='black')
    
# Axes, Labels, title etc 
ax.set_xlim([pd.to_datetime(start, format = '%Y-%m-%d'),pd.to_datetime(end, format = '%Y-%m-%d')])
ax.set_ylabel(r'Strike')
#ax.set_ylim(0, 5000)
ax.set_title(r'3 month moving average of daily strike price range for puts and calls', loc='center', fontsize='medium')
ax.legend(loc='upper left')

# Plot sout on second y-axis
if plot_snp == True:    
    ax2=ax.twinx()
    ax2.plot(df_snp['date'], df_snp['snp'], color = 'black', label ='SP500')
    ax2.set_ylabel("S&P 500")
    ax2.legend(loc='upper right')
    #ax.set_ylim(0, 5000)

filepath = Path(f'{pf}/data/figures/descriptive_strike_range') # Set name
plt.savefig(filepath) #bbox_inches='tight'

## Figure: Evolution of weights

In [None]:
###### Data                   
df_nweights = df_tcount.groupby(['cp_flag','date'])['n'].sum().reset_index() # Group by total volume for puts and calls
df_nweights_pivot = df_nweights.pivot(index="date", columns="cp_flag", values="n") # pivot
df_nweights_pivot['total']=df_nweights_pivot['C']+df_nweights_pivot['P']
df_nweights_pivot['w_c']=df_nweights_pivot['C']/df_nweights_pivot['total']
df_nweights_pivot = df_nweights_pivot.reset_index()

df_vol_cp_weights = df_vol_cp.pivot(index="date", columns="cp_flag", values="volume") # pivot
df_vol_cp_weights['total']=df_vol_cp_weights['C']+df_vol_cp_weights['P']
df_vol_cp_weights['w_c']=df_vol_cp_weights['C']/df_vol_cp_weights['total']
df_vol_cp_weights = df_vol_cp_weights.reset_index()


In [None]:
tau_grps = ['vlow', 'low','med', 'high']
colors = ['red', 'blue', 'orange', 'green']

fig, ax = plt.subplots(1, 1, figsize=(10, 6), constrained_layout=True)

for color, grp in enumerate(tau_grps):
    ###### Total volume by tau groups and cp_flag
    df_vol = df.groupby(['date','maturity_group', 'cp_flag'])['volume'].sum().reset_index() # Group by total volume for puts and calls

    # subset group
    df_grp = df_vol[df_vol['maturity_group']==grp] # grp here when loop

    # select cols
    df_grp = df_grp[['date', 'cp_flag','volume']]

    # pivot
    df_grp = df_grp.pivot(index="date", columns="cp_flag", values="volume") # pivot

    # n
    df_grp['total']=df_grp['C']+df_grp['P']
    df_grp['w_c']=df_grp['C']/df_grp['total']
    df_grp = df_grp.reset_index()
    
    # mean
    mean = np.mean(df_grp['w_c'])
    print(f'grp={grp}, mean={mean:.3f}')
    
    # plot
    x = df_grp['date']
    y = df_grp['w_c'].rolling(63).mean() #rolling average 3m - 63 trading days
    ax.plot(x, y, label = f'tau grp = {grp}', color=colors[color], alpha=0.5)


In [None]:
plot_snp = True #if true, plots SNP on 2nd y-axis

fig, ax = plt.subplots(1, 1, figsize=(10, 6), constrained_layout=True)

# X -Values
x = df_vol_cp_weights['date']
xmin =ax.get_xlim()[0]
xmax = ax.get_xlim()[1]

# Y - Values 
y1 = df_vol_cp_weights['w_c'].rolling(63).mean() #rolling average 3m - 63 trading days
y2 = df_nweights_pivot['w_c'].rolling(63).mean() #rolling average 3m - 63 trading days

# Plot
ax.plot(x, y1, label = r'$\omega$ - volume', color='b', alpha=0.5)
ax.plot(x, y2, label = r'$\omega$ - contract proportion', color='g', alpha=0.5)
ax.axhline(np.mean(df_vol_cp_weights['w_c']), xmin, xmax, linestyle = 'dashed',  color='b', alpha=0.75)
ax.axhline(np.mean(df_nweights_pivot['w_c']), xmin, xmax, linestyle = 'dashed', color='g', alpha=0.75)


# Axes, Labels, title etc 
ax.set_xlim([pd.to_datetime(start, format = '%Y-%m-%d'),pd.to_datetime(end, format = '%Y-%m-%d')])
ax.set_ylim(0.25, 0.5)
ax.set_ylabel(r'Weight on calls')
ax.set_title(r'3-month moving average of weight placed on calls relative to puts for $\tau_{all}$', loc='center', fontsize='medium')
ax.legend(loc='upper left')


# Plot sout on second y-axis
if plot_snp == True:    
    ax2=ax.twinx()
    ax2.plot(df_snp['date'], df_snp['snp'], color = 'black', label ='SP500')
    ax2.set_ylabel("S&P 500")
    ax2.legend(loc='upper right')
    #ax.set_ylim(0, 5000)

filepath = Path(f'{pf}/data/figures/descriptive_weights') # Set name
plt.savefig(filepath) #bbox_inches='tight'


## Put-call disparity

In [None]:
plot_snp = True #if true, plots SNP on 2nd y-axis

tau_grps = ['vlow', 'low','med', 'high']
money_grps = ['ITM', 'ATM', 'OTM']
money_grps_reverse = ['OTM', 'ATM', 'ITM']
colors = ['red', 'blue', 'orange', 'green']

fig, ax = plt.subplots(1, 3, figsize=(12, 4), constrained_layout=True)
for i, mon_grp in enumerate(money_grps):
    for j, grp in enumerate(tau_grps):

        # Select group

        df = df_pcd_grps_money[df_pcd_grps_money['moneyness_grp']==mon_grp]
        #df = df_pcd_grps[df_pcd_grps['maturity_group']==grp]
        df = df[df['maturity_group']==grp]

        # X -Values
        x = df['date']

        # Y - Values 
        y = df['pcdisparity'].rolling(63).mean() #rolling average 3m - 63 trading days

        # Plot
        ax[i].plot(x, y, label = f'tau grp = {grp}', color=colors[j])
        
    ax[i].set_title(f'$D_t(k,{{tau}})$, {mon_grp} calls, {money_grps_reverse[i]} puts', loc='center', fontsize='medium')
    ax[i].set_xlim([pd.to_datetime(start, format = '%Y-%m-%d'),pd.to_datetime(end, format = '%Y-%m-%d')])


# # Axes, Labels, title etc 
ax[0].set_ylabel(r'Put-call disparity $D_{t(k,\tau)}$')
ax[0].legend(loc='upper left')

# # Plot sout on second y-axis
# if plot_snp == True:    
#     ax2=ax.twinx()
#     ax2.plot(df_snp['date'], df_snp['snp'], color = 'black', label ='SP500')
#     ax2.set_ylabel("S&P 500")
#     ax2.legend(loc='upper right')
#     #ax.set_ylim(0, 5000)

filepath = Path(f'{pf}/data/figures/descriptive_put_call_disparity') # Set name
plt.savefig(filepath) #bbox_inches='tight'


In [None]:
np.mean(df_pcd['pcdisparity']) / np.mean(df_['snp'])

In [None]:
0.0020208267762782815*100

## Count observations

In [None]:
# SP500
df = pd.read_csv (f'{pf}/data/input/rawdata_01jan1996to31dec2021_extract.csv') # 28m rows in full data set


In [None]:
print( min(df['date']), max(df['date']), len(df)/10**6, sum(df['volume'])/10**6)

In [None]:
4066418005/10**6

In [None]:
names = ['DJX','NDX', 'RUT', 'TSLA', 'AMZN', 'GOOGL']
for name in names:
    df = pd.read_csv (f'{pf}/data/input/rawdata_01jan1996to31dec2021_extract_{name}.csv') # Data
    print(name, min(df['date']), max(df['date']), len(df)/10**6, sum(df['volume'])/10**6)

In [None]:
names = ['DJX','NDX', 'RUT', 'TSLA', 'AMZN', 'GOOGL']
for name in names:
    df =  pd.read_csv (f'{pf}/data/output/allopt_01jan1996to31dec2021_extract_{name}.csv') # Data
    print(name, min(df['date']), max(df['date']), len(df)/10**6, sum(df['volume'])/10**6)

In [None]:
df =  pd.read_csv (f'{pf}/data/output/allopt_01jan1996to31dec2021_extract.csv') # Data
print(name, min(df['date']), max(df['date']), len(df)/10**6, sum(df['volume'])/10**6)