# Calculate Limits

## Simulate First

You can just run the whole notebook

### Imports / Helper Functions

In [1]:
import sys
import math
import warnings

import psycopg2
import wrds
import gzip

import seaborn as sns
import os
import quandl
import json
import zipfile
import matplotlib.pyplot as plt
import matplotlib.dates as mdates

import functools
import requests
import io

import urllib.request
from urllib.error import HTTPError
# from html_table_parser.parser import HTMLTableParser
import re

import plotnine as p9
from plotnine import ggplot, scale_x_date, guides, guide_legend, geom_bar, scale_y_continuous, \
    scale_color_identity, geom_line, geom_point, labs, theme_minimal, theme, element_blank, element_text, \
        geom_ribbon, geom_hline, aes, scale_size_manual, scale_color_manual, ggtitle

from datetime import datetime
import datetime

import pandas as pd
# import pandas_market_calendars as mcal
from pandas.plotting import autocorrelation_plot
import numpy as np
from numpy import cumsum, log, polyfit, sqrt, std, subtract
import scipy as sp
from scipy.stats import norm
import scipy.stats as stats

from statsmodels.tsa.stattools import coint
from statsmodels.graphics.tsaplots import plot_acf
import statsmodels.api as sm
from statsmodels.tsa.stattools import acf
from statsmodels.regression.linear_model import OLS
from statsmodels.tools.tools import add_constant

from collections import deque
from bisect import insort, bisect_left
from itertools import islice

pd.set_option('display.max_columns', None)

In [2]:
def max_drawdown(returns):
        local_max = [n for n in range(len(returns)-1) if ((n==0) and (returns[0] > returns[1])) or 
           ((n > 0) and  (returns[n-1]<returns[n]) and (returns[n+1]<returns[n]))] 
        
        local_min = [n for n in range(1,len(returns)) if ((n == len(returns)-1) and (returns[-1] < returns[-2])) or
                (returns[n-1]>returns[n]) and (returns[n+1]>returns[n])]
        
        def next_local_min(n):
            if [m for m in local_min if m > n]:
                return [m for m in local_min if m > n][0]
            else: return None
        
        drawdowns = [(n,next_local_min(n)) for n in local_max]
        drawdown_values = [returns[n] - returns[m] for (n,m) in drawdowns if m != None]
        if drawdown_values:
            return  np.max(drawdown_values)
        else: return 0.0

### Dividends

In [3]:
start_date = '2018-01-01'
end_date = '2023-02-28'
spy_divdata = pd.read_csv('spy_tickerdata.csv')[['date','dividend']].sort_values(by='date').reset_index(drop=True)
spy_divdata = spy_divdata.loc[(spy_divdata['date'] >= start_date) & (spy_divdata['date'] <= end_date)].copy().reset_index(drop=True)
spy_divdata['date'] = pd.to_datetime(spy_divdata['date'])
trading_days = spy_divdata['date']
spy_divdata = spy_divdata.loc[spy_divdata['dividend'] != 0]

# Step 1: Calculate the last day of the following month
spy_divdata['date'] = pd.to_datetime(spy_divdata['date'])  # Ensure 'date' is in datetime format
spy_divdata['end_of_next_month'] = spy_divdata['date'] + pd.offsets.MonthEnd(2)

# Step 2: Adjust to the nearest trading day

# This function finds the last trading day of the month or the nearest previous trading day
def find_pay_date(end_of_month, trading_days):
    if end_of_month in trading_days:
        return end_of_month
    else:
        # Find the nearest previous trading day
        eligible_days = trading_days[trading_days <= end_of_month]
        return eligible_days.max()  # The last trading day before or on the end_of_month

# Apply the function to each row to determine the 'pay_date'
spy_divdata['pay_date'] = spy_divdata['end_of_next_month'].apply(lambda date: find_pay_date(date, trading_days))

# Optionally, you can drop the intermediate 'end_of_next_month' column if it's no longer needed
spy_divdata.drop(columns=['end_of_next_month'], inplace=True)

spy_divdata

Unnamed: 0,date,dividend,pay_date
51,2018-03-16,1.09678,2018-04-30
114,2018-06-15,1.246,2018-07-31
182,2018-09-21,1.323,2018-10-31
245,2018-12-21,1.4354,2019-01-31
301,2019-03-15,1.2331,2019-04-30
369,2019-06-21,1.4316,2019-07-31
432,2019-09-20,1.38362,2019-10-31
496,2019-12-20,1.57,2020-01-31
557,2020-03-20,1.40556,2020-04-30
620,2020-06-19,1.3662,2020-07-31


### Simulations

In [4]:
data = pd.read_csv('combinedata.csv')
options = pd.read_csv('option_df.csv')

data['exdate'] = pd.to_datetime(data['exdate'])
options['exdate'] = pd.to_datetime(options['exdate'])


data['exdate_str'] = data['exdate'].dt.strftime('%Y%m%d')  
data['strikeID'] = data['exdate_str'] + '_' + data['strike_price'].astype(str)  
data.drop(columns=['exdate_str'], inplace=True)  

options['exdate_str'] = options['exdate'].dt.strftime('%Y%m%d')  
options['strikeID'] = options['exdate_str'] + '_' + options['strike_price'].astype(str)  
options.drop(columns=['exdate_str'], inplace=True)  

options['date'] = pd.to_datetime(options['date'])
data['date'] = pd.to_datetime(data['date'])

In [5]:
def create_simulations(options_subset, data, dropna_greeks=False):
    simulations = {}

    for index, row in options_subset.iterrows():
        strikeID = row['exdate'].strftime('%Y%m%d') + '_' + str(row['strike_price'])
        mask = (data['strikeID'] == strikeID) & (data['date'] >= row['date']) & (data['date'] <= row['close_date'])
        temp_df = data[mask].sort_values(by=['date', 'cp_flag'])

        shared_cols = ['date', 'exdate', 'strike_price', 'close', 'strikeID'] # 'expiry_indicator',  'adj_open', 'adj_close', 'adj_volume',
        greeks_cols = ['impl_volatility', 'delta'] # , 'gamma', 'vega', 'theta'
        call_specific_cols = ['cp_flag', 'best_bid', 'best_offer'] + greeks_cols # , 'volume', 'open_interest'
        put_specific_cols = call_specific_cols

        calls = temp_df[temp_df['cp_flag'] == 'C'][shared_cols + call_specific_cols].rename(columns={col: col + '_c' for col in call_specific_cols})
        puts = temp_df[temp_df['cp_flag'] == 'P'][shared_cols + put_specific_cols].rename(columns={col: col + '_p' for col in put_specific_cols})

        merged_df = pd.merge(calls, puts, on=shared_cols, how='outer')

        if dropna_greeks:
            greeks_cols_c = [col + '_c' for col in greeks_cols]
            greeks_cols_p = [col + '_p' for col in greeks_cols]
            merged_df = merged_df.dropna(subset=greeks_cols_c + greeks_cols_p, how='any')

        merged_df['delta_sum'] = merged_df['delta_c'].fillna(0) + merged_df['delta_p'].fillna(0)
        merged_df['shares_held'] = -1 * merged_df['delta_sum']

        merged_df = merged_df.sort_values(by='date')
        merged_df['sharechange'] = merged_df['shares_held'].diff()

        simulations[row['date'].strftime('%Y-%m-%d')] = merged_df

    return simulations

In [6]:
%%time
simulations = create_simulations(options, data, dropna_greeks=True)

CPU times: total: 8min 31s
Wall time: 8min 31s


In [7]:
len(trading_days) - len(simulations) # Adds up to contracts with missing greeks

158

This gets rid of simulations missing trading days:

In [8]:
filtered_simulations = {}

for key, df in simulations.items():
    # Ensure 'date' column is in datetime64 dtype
    df['date'] = pd.to_datetime(df['date'])
    
    # Find the range of trading days for each simulation
    start_date = df['date'].min()
    end_date1 = df['date'].max()
    
    # Generate the expected range of trading days
    expected_trading_days = trading_days[(trading_days >= start_date) & (trading_days <= end_date1)]
    
    # Check if all expected trading days are present in the DataFrame
    actual_trading_days = df['date'].unique()
    actual_trading_days = pd.to_datetime(actual_trading_days)  # Ensure this is in datetime format for comparison
    
    # Use .isin() to check if each item in expected_trading_days is in actual_trading_days, then check if all are True
    if expected_trading_days.isin(actual_trading_days).all():
        # If there are no gaps, add the DataFrame to the filtered_simulations dictionary
        filtered_simulations[key] = df

In [9]:
len(trading_days) - len(filtered_simulations)

325

In [10]:
for key, df in list(filtered_simulations.items())[0:1]: 
    print(f"DataFrame for {key}:")
    print(df.columns)
    display(df)  
    print("\n")

DataFrame for 2018-01-03:
Index(['date', 'exdate', 'strike_price', 'close', 'strikeID', 'cp_flag_c',
       'best_bid_c', 'best_offer_c', 'impl_volatility_c', 'delta_c',
       'cp_flag_p', 'best_bid_p', 'best_offer_p', 'impl_volatility_p',
       'delta_p', 'delta_sum', 'shares_held', 'sharechange'],
      dtype='object')


Unnamed: 0,date,exdate,strike_price,close,strikeID,cp_flag_c,best_bid_c,best_offer_c,impl_volatility_c,delta_c,cp_flag_p,best_bid_p,best_offer_p,impl_volatility_p,delta_p,delta_sum,shares_held,sharechange
0,2018-01-03,2018-02-02,270.0,270.47,20180202_270.0,C,2.65,2.69,0.072398,0.562218,P,1.77,1.8,0.069568,-0.442522,0.119696,-0.119696,
1,2018-01-04,2018-02-02,270.0,271.61,20180202_270.0,C,3.45,3.49,0.077848,0.63269,P,1.43,1.46,0.074706,-0.366922,0.265768,-0.265768,-0.146072
2,2018-01-05,2018-02-02,270.0,273.42,20180202_270.0,C,4.8,4.84,0.08382,0.727658,P,1.02,1.04,0.081783,-0.270119,0.457539,-0.457539,-0.191771
3,2018-01-08,2018-02-02,270.0,273.92,20180202_270.0,C,5.17,5.2,0.089623,0.74924,P,0.81,0.82,0.082307,-0.234696,0.514544,-0.514544,-0.057005
4,2018-01-09,2018-02-02,270.0,274.54,20180202_270.0,C,5.67,5.72,0.093639,0.773171,P,0.83,0.84,0.091492,-0.223283,0.549888,-0.549888,-0.035344
5,2018-01-10,2018-02-02,270.0,274.12,20180202_270.0,C,5.27,5.33,0.092791,0.75907,P,0.85,0.87,0.089866,-0.235707,0.523363,-0.523363,0.026525
6,2018-01-11,2018-02-02,270.0,276.12,20180202_270.0,C,6.76,6.91,0.090857,0.854846,P,0.56,0.58,0.097319,-0.162272,0.692574,-0.692574,-0.169211
7,2018-01-12,2018-02-02,270.0,277.92,20180202_270.0,C,8.45,8.6,0.102281,0.890014,P,0.43,0.44,0.107499,-0.121856,0.768158,-0.768158,-0.075584
8,2018-01-16,2018-02-02,270.0,276.97,20180202_270.0,C,7.82,7.97,0.130638,0.827166,P,0.6,0.61,0.121851,-0.157003,0.670163,-0.670163,0.097995
9,2018-01-17,2018-02-02,270.0,279.61,20180202_270.0,C,10.03,10.25,0.131682,0.904279,P,0.38,0.39,0.135023,-0.101488,0.802791,-0.802791,-0.132628






### PnL and Misc for future calcs

In [11]:
def calculate_realized_PL(df, long_op=True):
    df = df.reset_index(drop=True)
    
    # Vectorized initial operations for stock
    df['stock_pos'] = np.where(long_op, df['shares_held'], -df['shares_held'])
    df = df.drop(columns=['shares_held'])
    df['pos_change'] = np.where(long_op, df['sharechange'], -df['sharechange'])
    df = df.drop(columns=['sharechange'])
    df.loc[0, 'pos_change'] = df.loc[0, 'stock_pos']
    
    df['change_cost_basis'] = df['pos_change'] * df['close']
    df['stock_cost_basis'] = df['change_cost_basis'].cumsum()
    df['daily_stock_value'] = df['stock_pos'] * df['close']
    df['stock_PL'] = df['daily_stock_value'] - df['stock_cost_basis']

    # Initial option value and vectorized daily option value calculation
    df['option_cost_basis'] = df.loc[0, 'best_offer_c'] + df.loc[0, 'best_offer_p'] if long_op else -df.loc[0, 'best_bid_c'] - df.loc[0, 'best_bid_p']
    df['change_cost_basis_op'] = 0.0
    df.loc[0, 'change_cost_basis_op'] = df.loc[0, 'option_cost_basis']
    df['daily_option_value'] = np.where(long_op, df['best_bid_c'] + df['best_bid_p'], -(df['best_offer_c'] + df['best_offer_p']))
    df['option_PL'] = df['daily_option_value']- df['option_cost_basis']

    # Column to track total positions, PL, and cash flow after positions are closed
    df['total_cost_basis'] = df['stock_cost_basis'] + df['option_cost_basis']
    df['total_pos_value'] = df['daily_stock_value'] + df['daily_option_value']
    df['total_PL'] = df['stock_PL'] + df['option_PL']
    df['realized_stock_PL'] = 0.0
    df['realized_option_PL'] = 0.0
    df['realized_PL'] = 0.0

    # Misc
    df['UID'] = df['strikeID'] + '_' + str(df.loc[0, 'date'].date())
    df['to_open'] = 0
    df.loc[0, 'to_open'] = 1
#    df['gross_trades_value'] = abs(df['to_open'] * df['option_cost_basis']) + abs(df['change_cost_basis']) # Need to do this at end

    # Close positions on final day
    final_row_index = len(df) - 1
    df.loc[final_row_index, 'realized_stock_PL'] = df.loc[final_row_index, 'stock_PL'] if final_row_index > 0 else 0
    df.loc[final_row_index, 'realized_option_PL'] = df.loc[final_row_index, 'option_PL'] if final_row_index > 0 else 0
    df.loc[final_row_index, 'realized_PL'] = df.loc[final_row_index, 'total_PL'] if final_row_index > 0 else 0
#    df.loc[final_row_index, 'gross_trades_value'] = abs(df.loc[final_row_index, 'daily_option_value']) + abs(df.loc[final_row_index - 1, 'stock_pos']) * df.loc[final_row_index, 'close'] 

    final_close_price = df.loc[final_row_index, 'close']
    df.loc[final_row_index, 'stock_pos'] = 0
    df.loc[final_row_index, 'pos_change'] = - df.loc[final_row_index - 1, 'stock_pos'] if final_row_index > 0 else 0
    df.loc[final_row_index, 'change_cost_basis'] = df.loc[final_row_index, 'pos_change'] * final_close_price
    df.loc[final_row_index, 'stock_cost_basis'] = 0
    df.loc[final_row_index, 'daily_stock_value'] = 0
    df.loc[final_row_index, 'stock_PL'] = 0

    df.loc[final_row_index, 'option_cost_basis'] = 0
    df.loc[final_row_index, 'change_cost_basis_op'] = -df.loc[final_row_index, 'daily_option_value'] if final_row_index > 0 else 0
    df.loc[final_row_index, 'daily_option_value'] = 0
    df.loc[final_row_index, 'option_PL'] = 0

    df.loc[final_row_index, 'total_cost_basis'] = 0
    df.loc[final_row_index, 'total_pos_value'] = 0
    df.loc[final_row_index, 'total_PL'] = 0
    
    return df

*Cash - total cost basis + net realized PL* gives you how much cash is available to use

In [12]:
%%time
simulations_long = {date: calculate_realized_PL(df.copy(), long_op=True) for date, df in filtered_simulations.items()}
simulations_short = {date: calculate_realized_PL(df.copy(), long_op=False) for date, df in filtered_simulations.items()}

CPU times: total: 11.3 s
Wall time: 11.3 s


In [13]:
for key, df in list(simulations_short.items())[-5:]: 
    print(f"DataFrame for {key}:")
    print(df.columns)
    display(df)  
    print("\n")

DataFrame for 2023-02-22:
Index(['date', 'exdate', 'strike_price', 'close', 'strikeID', 'cp_flag_c',
       'best_bid_c', 'best_offer_c', 'impl_volatility_c', 'delta_c',
       'cp_flag_p', 'best_bid_p', 'best_offer_p', 'impl_volatility_p',
       'delta_p', 'delta_sum', 'stock_pos', 'pos_change', 'change_cost_basis',
       'stock_cost_basis', 'daily_stock_value', 'stock_PL',
       'option_cost_basis', 'change_cost_basis_op', 'daily_option_value',
       'option_PL', 'total_cost_basis', 'total_pos_value', 'total_PL',
       'realized_stock_PL', 'realized_option_PL', 'realized_PL', 'UID',
       'to_open'],
      dtype='object')


Unnamed: 0,date,exdate,strike_price,close,strikeID,cp_flag_c,best_bid_c,best_offer_c,impl_volatility_c,delta_c,cp_flag_p,best_bid_p,best_offer_p,impl_volatility_p,delta_p,delta_sum,stock_pos,pos_change,change_cost_basis,stock_cost_basis,daily_stock_value,stock_PL,option_cost_basis,change_cost_basis_op,daily_option_value,option_PL,total_cost_basis,total_pos_value,total_PL,realized_stock_PL,realized_option_PL,realized_PL,UID,to_open
0,2023-02-22,2023-03-24,399.0,398.54,20230324_399.0,C,9.08,9.12,0.202075,0.51741,P,9.67,9.71,0.201626,-0.506205,0.011205,0.011205,0.011205,4.465641,4.465641,4.465641,0.0,-18.75,-18.75,-18.83,-0.08,-14.284359,-14.364359,-0.08,0.0,0.0,0.0,20230324_399.0_2023-02-22,1
1,2023-02-23,2023-03-24,399.0,400.66,20230324_399.0,C,9.99,10.03,0.200472,0.555948,P,8.01,8.03,0.190496,-0.46952,0.086428,0.086428,0.075223,30.138847,34.604488,34.628242,0.023755,-18.75,0.0,-18.06,0.69,15.854488,16.568242,0.713755,0.0,0.0,0.0,20230324_399.0_2023-02-22,0
2,2023-02-24,2023-03-24,399.0,396.38,20230324_399.0,C,7.37,7.41,0.195788,0.473339,P,10.18,10.21,0.19288,-0.551125,-0.077786,-0.077786,-0.164214,-65.091145,-30.486657,-30.832815,-0.346157,-18.75,0.0,-17.62,1.13,-49.236657,-48.452815,0.783843,0.0,0.0,0.0,20230324_399.0_2023-02-22,0
3,2023-02-27,2023-03-24,399.0,397.73,20230324_399.0,C,7.54,7.58,0.197377,0.495326,P,9.04,9.07,0.192467,-0.531122,-0.035796,-0.035796,0.04199,16.700683,-13.785975,-14.237143,-0.451168,-18.75,0.0,-16.65,2.1,-32.535975,-30.887143,1.648832,0.0,0.0,0.0,20230324_399.0_2023-02-22,0
4,2023-02-28,2023-03-24,399.0,396.26,20230324_399.0,C,7.03,7.05,0.207028,0.465189,P,8.85,8.87,0.170603,-0.573872,-0.108683,0.0,0.035796,14.184523,0.0,0.0,0.0,0.0,15.92,0.0,0.0,0.0,0.0,0.0,-0.398548,2.83,2.431452,20230324_399.0_2023-02-22,0




DataFrame for 2023-02-23:
Index(['date', 'exdate', 'strike_price', 'close', 'strikeID', 'cp_flag_c',
       'best_bid_c', 'best_offer_c', 'impl_volatility_c', 'delta_c',
       'cp_flag_p', 'best_bid_p', 'best_offer_p', 'impl_volatility_p',
       'delta_p', 'delta_sum', 'stock_pos', 'pos_change', 'change_cost_basis',
       'stock_cost_basis', 'daily_stock_value', 'stock_PL',
       'option_cost_basis', 'change_cost_basis_op', 'daily_option_value',
       'option_PL', 'total_cost_basis', 'total_pos_value', 'total_PL',
       'realized_stock_PL', 'realized_option_PL', 'realized_PL', 'UID',
       'to_open'],
      dtype='object')


Unnamed: 0,date,exdate,strike_price,close,strikeID,cp_flag_c,best_bid_c,best_offer_c,impl_volatility_c,delta_c,cp_flag_p,best_bid_p,best_offer_p,impl_volatility_p,delta_p,delta_sum,stock_pos,pos_change,change_cost_basis,stock_cost_basis,daily_stock_value,stock_PL,option_cost_basis,change_cost_basis_op,daily_option_value,option_PL,total_cost_basis,total_pos_value,total_PL,realized_stock_PL,realized_option_PL,realized_PL,UID,to_open
0,2023-02-23,2023-03-24,401.0,400.66,20230324_401.0,C,8.79,8.82,0.19672,0.518997,P,8.83,8.86,0.186139,-0.507514,0.011483,0.011483,0.011483,4.600779,4.600779,4.600779,0.0,-17.62,-17.62,-17.68,-0.06,-13.019221,-13.079221,-0.06,0.0,0.0,0.0,20230324_401.0_2023-02-23,1
1,2023-02-24,2023-03-24,401.0,396.38,20230324_401.0,C,6.35,6.39,0.19212,0.433775,P,11.19,11.22,0.188801,-0.590449,-0.156674,-0.156674,-0.168157,-66.654072,-62.053293,-62.10244,-0.049147,-17.62,0.0,-17.61,0.01,-79.673293,-79.71244,-0.039147,0.0,0.0,0.0,20230324_401.0_2023-02-23,0
2,2023-02-27,2023-03-24,401.0,397.73,20230324_401.0,C,6.48,6.51,0.193449,0.453692,P,10.01,10.04,0.188205,-0.57267,-0.118978,-0.118978,0.037696,14.99283,-47.060463,-47.32112,-0.260657,-17.62,0.0,-16.55,1.07,-64.680463,-63.87112,0.809343,0.0,0.0,0.0,20230324_401.0_2023-02-23,0
3,2023-02-28,2023-03-24,401.0,396.26,20230324_401.0,C,5.99,6.01,0.202116,0.424099,P,9.85,9.87,0.164628,-0.623364,-0.199265,0.0,0.118978,47.146222,0.0,0.0,0.0,0.0,15.88,0.0,0.0,0.0,0.0,0.0,-0.085759,1.74,1.654241,20230324_401.0_2023-02-23,0




DataFrame for 2023-02-24:
Index(['date', 'exdate', 'strike_price', 'close', 'strikeID', 'cp_flag_c',
       'best_bid_c', 'best_offer_c', 'impl_volatility_c', 'delta_c',
       'cp_flag_p', 'best_bid_p', 'best_offer_p', 'impl_volatility_p',
       'delta_p', 'delta_sum', 'stock_pos', 'pos_change', 'change_cost_basis',
       'stock_cost_basis', 'daily_stock_value', 'stock_PL',
       'option_cost_basis', 'change_cost_basis_op', 'daily_option_value',
       'option_PL', 'total_cost_basis', 'total_pos_value', 'total_PL',
       'realized_stock_PL', 'realized_option_PL', 'realized_PL', 'UID',
       'to_open'],
      dtype='object')


Unnamed: 0,date,exdate,strike_price,close,strikeID,cp_flag_c,best_bid_c,best_offer_c,impl_volatility_c,delta_c,cp_flag_p,best_bid_p,best_offer_p,impl_volatility_p,delta_p,delta_sum,stock_pos,pos_change,change_cost_basis,stock_cost_basis,daily_stock_value,stock_PL,option_cost_basis,change_cost_basis_op,daily_option_value,option_PL,total_cost_basis,total_pos_value,total_PL,realized_stock_PL,realized_option_PL,realized_PL,UID,to_open
0,2023-02-24,2023-03-24,396.0,396.38,20230324_396.0,C,9.05,9.08,0.201088,0.531177,P,8.81,8.84,0.19899,-0.494048,0.037129,0.037129,0.037129,14.717193,14.717193,14.717193,0.0,-17.86,-17.86,-17.92,-0.06,-3.142807,-3.202807,-0.06,0.0,0.0,0.0,20230324_396.0_2023-02-24,1
1,2023-02-27,2023-03-24,396.0,397.73,20230324_396.0,C,9.27,9.31,0.202705,0.555327,P,7.73,7.76,0.198705,-0.471244,0.084083,0.084083,0.046954,18.675014,33.392207,33.442332,0.050124,-17.86,0.0,-17.07,0.79,15.532207,16.372332,0.840124,0.0,0.0,0.0,20230324_396.0_2023-02-24,0
2,2023-02-28,2023-03-24,396.0,396.26,20230324_396.0,C,8.74,8.77,0.214299,0.524633,P,7.51,7.53,0.178895,-0.50379,0.020843,0.0,-0.084083,-33.31873,0.0,0.0,0.0,0.0,16.3,0.0,0.0,0.0,0.0,0.0,-0.073478,1.56,1.486522,20230324_396.0_2023-02-24,0




DataFrame for 2023-02-27:
Index(['date', 'exdate', 'strike_price', 'close', 'strikeID', 'cp_flag_c',
       'best_bid_c', 'best_offer_c', 'impl_volatility_c', 'delta_c',
       'cp_flag_p', 'best_bid_p', 'best_offer_p', 'impl_volatility_p',
       'delta_p', 'delta_sum', 'stock_pos', 'pos_change', 'change_cost_basis',
       'stock_cost_basis', 'daily_stock_value', 'stock_PL',
       'option_cost_basis', 'change_cost_basis_op', 'daily_option_value',
       'option_PL', 'total_cost_basis', 'total_pos_value', 'total_PL',
       'realized_stock_PL', 'realized_option_PL', 'realized_PL', 'UID',
       'to_open'],
      dtype='object')


Unnamed: 0,date,exdate,strike_price,close,strikeID,cp_flag_c,best_bid_c,best_offer_c,impl_volatility_c,delta_c,cp_flag_p,best_bid_p,best_offer_p,impl_volatility_p,delta_p,delta_sum,stock_pos,pos_change,change_cost_basis,stock_cost_basis,daily_stock_value,stock_PL,option_cost_basis,change_cost_basis_op,daily_option_value,option_PL,total_cost_basis,total_pos_value,total_PL,realized_stock_PL,realized_option_PL,realized_PL,UID,to_open
0,2023-02-27,2023-03-31,398.0,397.73,20230331_398.0,C,8.92,8.96,0.194675,0.51141,P,9.23,9.26,0.188579,-0.505251,0.006159,0.006159,0.006159,2.449619,2.449619,2.449619,0.0,-18.15,-18.15,-18.22,-0.07,-15.700381,-15.770381,-0.07,0.0,0.0,0.0,20230331_398.0_2023-02-27,1
1,2023-02-28,2023-03-31,398.0,396.26,20230331_398.0,C,8.39,8.42,0.20288,0.483923,P,9.01,9.03,0.169345,-0.540488,-0.056565,0.0,-0.006159,-2.440565,0.0,0.0,0.0,0.0,17.45,0.0,0.0,0.0,0.0,0.0,-0.009054,0.7,0.690946,20230331_398.0_2023-02-27,0




DataFrame for 2023-02-28:
Index(['date', 'exdate', 'strike_price', 'close', 'strikeID', 'cp_flag_c',
       'best_bid_c', 'best_offer_c', 'impl_volatility_c', 'delta_c',
       'cp_flag_p', 'best_bid_p', 'best_offer_p', 'impl_volatility_p',
       'delta_p', 'delta_sum', 'stock_pos', 'pos_change', 'change_cost_basis',
       'stock_cost_basis', 'daily_stock_value', 'stock_PL',
       'option_cost_basis', 'change_cost_basis_op', 'daily_option_value',
       'option_PL', 'total_cost_basis', 'total_pos_value', 'total_PL',
       'realized_stock_PL', 'realized_option_PL', 'realized_PL', 'UID',
       'to_open'],
      dtype='object')


Unnamed: 0,date,exdate,strike_price,close,strikeID,cp_flag_c,best_bid_c,best_offer_c,impl_volatility_c,delta_c,cp_flag_p,best_bid_p,best_offer_p,impl_volatility_p,delta_p,delta_sum,stock_pos,pos_change,change_cost_basis,stock_cost_basis,daily_stock_value,stock_PL,option_cost_basis,change_cost_basis_op,daily_option_value,option_PL,total_cost_basis,total_pos_value,total_PL,realized_stock_PL,realized_option_PL,realized_PL,UID,to_open
0,2023-02-28,2023-03-31,396.0,396.26,20230331_396.0,C,9.55,9.58,0.207305,0.519384,P,8.15,8.17,0.174174,-0.498412,0.020972,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20230331_396.0_2023-02-28,1






## Trading Strat / Positions / Limits

### Trading Strat

In [14]:
iv_data = pd.read_csv('iv_calculations.csv')
iv_data['iv'] = iv_data['iv']
iv_data

Unnamed: 0.1,Unnamed: 0,date,iv,BS
0,0,2018-01-02,0.021847,0.068871
1,1,2018-01-03,0.023454,0.072398
2,2,2018-01-04,0.027852,0.071179
3,3,2018-01-05,0.027788,0.074068
4,4,2018-01-08,0.029469,0.072545
...,...,...,...,...
1293,1293,2023-02-22,0.107065,0.202075
1294,1294,2023-02-23,0.111236,0.196720
1295,1295,2023-02-24,0.100687,0.201088
1296,1296,2023-02-27,0.114114,0.194675


In [15]:
temp_data = []

for key, df in filtered_simulations.items():
    temp_data.append({'date': key, 'BS_Call_IV': df.loc[0, 'impl_volatility_c']})

BS_Call_IV = pd.DataFrame(temp_data)

iv_data['date'] = pd.to_datetime(iv_data['date'])
BS_Call_IV['date'] = pd.to_datetime(BS_Call_IV['date'])

IV_compare = pd.merge(BS_Call_IV, iv_data[['date', 'iv']], on='date', how='left')
IV_compare.rename(columns={'iv': 'MF_Call_IV'}, inplace=True)
IV_compare['IV_diff'] = IV_compare['MF_Call_IV'] - IV_compare['BS_Call_IV']

In [16]:
IV_compare

Unnamed: 0,date,BS_Call_IV,MF_Call_IV,IV_diff
0,2018-01-03,0.072398,0.023454,-0.048944
1,2018-01-04,0.071179,0.027852,-0.043327
2,2018-01-10,0.080641,0.030803,-0.049838
3,2018-01-11,0.076890,0.028318,-0.048572
4,2018-01-12,0.079350,0.029221,-0.050129
...,...,...,...,...
968,2023-02-22,0.202075,0.107065,-0.095010
969,2023-02-23,0.196720,0.111236,-0.085484
970,2023-02-24,0.201088,0.100687,-0.100401
971,2023-02-27,0.194675,0.114114,-0.080561


#### Quick Look at IV

Taking a look at the differences in IV:

In [17]:
(IV_compare['IV_diff'] > 0).sum()

124

In [18]:
IV_compare[IV_compare['IV_diff'] > 0]['IV_diff'].quantile(0.5)

0.31982084842669284

In [19]:
IV_compare[IV_compare['IV_diff'] > 0]['IV_diff'].quantile(0.25)

0.16191266523189107

In [20]:
IV_compare[IV_compare['IV_diff'] > 0]['IV_diff'].quantile(0.2)

0.12497667184676832

In [21]:
IV_compare[IV_compare['IV_diff'] < 0]['IV_diff'].quantile(0.75)

-0.0785731074996423

In [22]:
IV_compare[IV_compare['IV_diff'] < 0]['IV_diff'].quantile(0.80)

-0.07246443270542066

In [23]:
IV_compare[IV_compare['IV_diff'] < 0]['IV_diff'].quantile(0.70)

-0.08366817795015688

In [24]:
IV_compare[IV_compare['IV_diff'] < 0]['IV_diff'].quantile(0.50)

-0.11270668550109746

In [36]:
IV_compare['IV_diff'].max()

0.5826495838418329

In [37]:
IV_compare['IV_diff'].min()

-0.6570148924085712

#### Back to Strats

Trading Strategies:

In [25]:
# Long-Short
def trade_strategy_1(x):
    if x > 0.25:
        return 1
    elif x < -0.10:
        return -1
    else:
        return 0

# Long Only
def trade_strategy_2(x):
    if x > 0.35:
        return 1
    else:
        return 0

# Short Only
def trade_strategy_3(x):
    if x < -0.08:
        return -1
    else:
        return 0

In [26]:
strat_dict = {'trade_1': trade_strategy_1, 'trade_2': trade_strategy_2, 'trade_3': trade_strategy_3}

In [27]:
for key, func in strat_dict.items():
    IV_compare[key] = IV_compare['IV_diff'].apply(func)
IV_compare

Unnamed: 0,date,BS_Call_IV,MF_Call_IV,IV_diff,trade_1,trade_2,trade_3
0,2018-01-03,0.072398,0.023454,-0.048944,0,0,0
1,2018-01-04,0.071179,0.027852,-0.043327,0,0,0
2,2018-01-10,0.080641,0.030803,-0.049838,0,0,0
3,2018-01-11,0.076890,0.028318,-0.048572,0,0,0
4,2018-01-12,0.079350,0.029221,-0.050129,0,0,0
...,...,...,...,...,...,...,...
968,2023-02-22,0.202075,0.107065,-0.095010,0,0,-1
969,2023-02-23,0.196720,0.111236,-0.085484,0,0,-1
970,2023-02-24,0.201088,0.100687,-0.100401,-1,0,-1
971,2023-02-27,0.194675,0.114114,-0.080561,0,0,-1


In [28]:
IV_compare['date'] = IV_compare['date'].dt.strftime('%Y-%m-%d')

In [29]:
abs(IV_compare['trade_2']).sum()

53

In [30]:
simulations_long[IV_compare['date'].iloc[0]]

Unnamed: 0,date,exdate,strike_price,close,strikeID,cp_flag_c,best_bid_c,best_offer_c,impl_volatility_c,delta_c,cp_flag_p,best_bid_p,best_offer_p,impl_volatility_p,delta_p,delta_sum,stock_pos,pos_change,change_cost_basis,stock_cost_basis,daily_stock_value,stock_PL,option_cost_basis,change_cost_basis_op,daily_option_value,option_PL,total_cost_basis,total_pos_value,total_PL,realized_stock_PL,realized_option_PL,realized_PL,UID,to_open
0,2018-01-03,2018-02-02,270.0,270.47,20180202_270.0,C,2.65,2.69,0.072398,0.562218,P,1.77,1.8,0.069568,-0.442522,0.119696,-0.119696,-0.119696,-32.374177,-32.374177,-32.374177,0.0,4.49,4.49,4.42,-0.07,-27.884177,-27.954177,-0.07,0.0,0.0,0.0,20180202_270.0_2018-01-03,1
1,2018-01-04,2018-02-02,270.0,271.61,20180202_270.0,C,3.45,3.49,0.077848,0.63269,P,1.43,1.46,0.074706,-0.366922,0.265768,-0.265768,-0.146072,-39.674616,-72.048793,-72.185246,-0.136453,4.49,0.0,4.88,0.39,-67.558793,-67.305246,0.253547,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
2,2018-01-05,2018-02-02,270.0,273.42,20180202_270.0,C,4.8,4.84,0.08382,0.727658,P,1.02,1.04,0.081783,-0.270119,0.457539,-0.457539,-0.191771,-52.434027,-124.48282,-125.100313,-0.617494,4.49,0.0,5.82,1.33,-119.99282,-119.280313,0.712506,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
3,2018-01-08,2018-02-02,270.0,273.92,20180202_270.0,C,5.17,5.2,0.089623,0.74924,P,0.81,0.82,0.082307,-0.234696,0.514544,-0.514544,-0.057005,-15.61481,-140.097629,-140.943892,-0.846263,4.49,0.0,5.98,1.49,-135.607629,-134.963892,0.643737,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
4,2018-01-09,2018-02-02,270.0,274.54,20180202_270.0,C,5.67,5.72,0.093639,0.773171,P,0.83,0.84,0.091492,-0.223283,0.549888,-0.549888,-0.035344,-9.703342,-149.800971,-150.966252,-1.16528,4.49,0.0,6.5,2.01,-145.310971,-144.466252,0.84472,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
5,2018-01-10,2018-02-02,270.0,274.12,20180202_270.0,C,5.27,5.33,0.092791,0.75907,P,0.85,0.87,0.089866,-0.235707,0.523363,-0.523363,0.026525,7.271033,-142.529938,-143.464266,-0.934327,4.49,0.0,6.12,1.63,-138.039938,-137.344266,0.695673,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
6,2018-01-11,2018-02-02,270.0,276.12,20180202_270.0,C,6.76,6.91,0.090857,0.854846,P,0.56,0.58,0.097319,-0.162272,0.692574,-0.692574,-0.169211,-46.722541,-189.25248,-191.233533,-1.981053,4.49,0.0,7.32,2.83,-184.76248,-183.913533,0.848947,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
7,2018-01-12,2018-02-02,270.0,277.92,20180202_270.0,C,8.45,8.6,0.102281,0.890014,P,0.43,0.44,0.107499,-0.121856,0.768158,-0.768158,-0.075584,-21.006305,-210.258785,-213.486471,-3.227687,4.49,0.0,8.88,4.39,-205.768785,-204.606471,1.162313,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
8,2018-01-16,2018-02-02,270.0,276.97,20180202_270.0,C,7.82,7.97,0.130638,0.827166,P,0.6,0.61,0.121851,-0.157003,0.670163,-0.670163,0.097995,27.141675,-183.11711,-185.615046,-2.497936,4.49,0.0,8.42,3.93,-178.62711,-177.195046,1.432064,0.0,0.0,0.0,20180202_270.0_2018-01-03,0
9,2018-01-17,2018-02-02,270.0,279.61,20180202_270.0,C,10.03,10.25,0.131682,0.904279,P,0.38,0.39,0.135023,-0.101488,0.802791,-0.802791,-0.132628,-37.084115,-220.201225,-224.468392,-4.267167,4.49,0.0,10.41,5.92,-215.711225,-214.058392,1.652833,0.0,0.0,0.0,20180202_270.0_2018-01-03,0


In [31]:
def generate_trades_dfs(strat_dict = strat_dict, initial_df = IV_compare):
    
    trades_dfs = {}
    
    for key in strat_dict.keys():

        dfs_to_combine = []
        
        for index, row in initial_df.iterrows():
            date = row['date']
            trade = row[key]
            iv_diff = row['IV_diff']
            
            if trade == 1 and date in simulations_long:
                df_to_add = simulations_long[date].copy()
            elif trade == -1 and date in simulations_short:
                df_to_add = simulations_short[date].copy()
            else:
                # Skip if 'trade' is 0 or the date is not in the dictionaries
                continue
            
            # Add 'trade' & 'IV_diff' column
            df_to_add['IV_diff'] = iv_diff # Needed for position calculation 
            df_to_add[key] = trade  # Include the 'trade' value
            dfs_to_combine.append(df_to_add)
        
        # Concatenate all collected DataFrames
        trades_dfs[key] = pd.concat(dfs_to_combine, ignore_index=True)
        trades_dfs[key] = trades_dfs[key].sort_values(by=['date', 'exdate', 'strike_price', 'to_open']).reset_index(drop=True)

    return trades_dfs

In [32]:
%%time
trades_dfs = generate_trades_dfs()

CPU times: total: 672 ms
Wall time: 669 ms


In [33]:
options['UID'] = options['strikeID'] + '_' + options['date'].dt.date.astype(str)
volumes = options[['date','volume_c','volume_p','adj_volume','UID']].copy()
volumes['date'] = volumes['date'].dt.strftime('%Y-%m-%d')
volumes['volume_med'] = (volumes['volume_c'] + volumes['volume_p']) / 2

Looking at volumes:

In [34]:
volumes.loc[volumes['volume_med'] > 0, 'volume_med'].quantile(0.10)

49.0

In [35]:
(volumes['volume_c'].quantile(.1) + volumes['volume_p'].quantile(.1))/2

28.5

Due to low liquidity on some days in the month-out ATM options, we instead assume that the market can handle options volume at around the 10th percentile of typical volume.

In [38]:
# 10 million dollars: 1 million initial capital, up to 9 million as a lending facility
KAPITAL = 1e7
INITIAL = 1e6
LEVERAGE = KAPITAL - INITIAL

In [64]:
def pos_size(IV_diff, strike_price, option_cost_basis, UID, key):
    
    # Assume we can get filled a percentage of the volume; set minimum to about 10th percentile of non-zero volume days (reasonable entry/exit liquidity assumption)
    volume = min(volumes.loc[volumes['UID'] == UID, 'volume_med'].item(), 50)

    # Calculating position size based on attractiveness while ensuring risk stays within limits and capital remains bounded
    factor = min(min(abs(IV_diff), 0.8) * volume * strike_price / 10, KAPITAL / 10)

    if option_cost_basis == 0:
        # Get the first matching 'option_cost_basis' safely
        filtered_df = trades_dfs[key].loc[trades_dfs[key]['UID'] == UID, 'option_cost_basis']

        option_cost_basis = filtered_df.iloc[0] 
    # Requires a whole number of options contracts
    posSize = round(factor / abs(option_cost_basis)) if option_cost_basis != 0 else 0
    
    return posSize
    
    # kept it simple instead of my earlier complex one

In [65]:
for key, df in trades_dfs.items():
    trades_dfs[key] = df.drop(columns=[col for col in df.columns if col.endswith('_p') or col.endswith('_c')]).copy()

In [66]:
%%time

for key, df in trades_dfs.items():

    df['pos_size'] = df.apply(lambda row: pos_size(row['IV_diff'], row['strike_price'], row['option_cost_basis'], row['UID'], key), axis=1)
    lot_size = 100 * df['pos_size']
    
    for col in ['stock_pos',	'pos_change',	'change_cost_basis',	'stock_cost_basis',	'daily_stock_value',	'stock_PL',	'option_cost_basis',
                'change_cost_basis_op', 'daily_option_value',	'option_PL',	'total_cost_basis',	'total_pos_value',	'total_PL',	'realized_stock_PL',
                'realized_option_PL',	'realized_PL']:
        df['sized_' + col] = lot_size * df[col]

CPU times: total: 4.25 s
Wall time: 4.28 s


In [67]:
trades_dfs['trade_1'].tail(6)

Unnamed: 0,date,exdate,strike_price,close,strikeID,delta_sum,stock_pos,pos_change,change_cost_basis,stock_cost_basis,daily_stock_value,stock_PL,option_cost_basis,change_cost_basis_op,daily_option_value,option_PL,total_cost_basis,total_pos_value,total_PL,realized_stock_PL,realized_option_PL,realized_PL,UID,to_open,IV_diff,trade_1,pos_size,sized_stock_pos,sized_pos_change,sized_change_cost_basis,sized_stock_cost_basis,sized_daily_stock_value,sized_stock_PL,sized_option_cost_basis,sized_change_cost_basis_op,sized_daily_option_value,sized_option_PL,sized_total_cost_basis,sized_total_pos_value,sized_total_PL,sized_realized_stock_PL,sized_realized_option_PL,sized_realized_PL
8730,2023-02-24,2023-03-24,399.0,396.38,20230324_399.0,-0.077786,-0.077786,-0.164214,-65.091145,-30.468441,-30.832815,-0.364373,-19.71,0.0,-17.62,2.09,-50.178441,-48.452815,1.725627,0.0,0.0,0.0,20230324_399.0_2023-02-21,0,-0.103989,-1,11,-85.5646,-180.6354,-71600.259852,-33515.285584,-33916.096148,-400.810564,-21681.0,0.0,-19382.0,2299.0,-55196.285584,-53298.096148,1898.189436,0.0,0.0,0.0
8731,2023-02-27,2023-03-10,410.0,397.73,20230310_410.0,-0.68832,0.0,0.688026,273.648581,0.0,0.0,0.0,0.0,14.0,0.0,0.0,0.0,0.0,0.0,-1.482403,3.12,1.637597,20230310_410.0_2023-02-06,0,-0.12561,-1,15,0.0,1032.039,410472.87147,0.0,0.0,0.0,0.0,21000.0,0.0,0.0,0.0,0.0,0.0,-2223.604995,4680.0,2456.395005
8732,2023-02-27,2023-03-24,396.0,397.73,20230324_396.0,0.084083,0.084083,0.046954,18.675014,33.392207,33.442332,0.050124,-17.86,0.0,-17.07,0.79,15.532207,16.372332,0.840124,0.0,0.0,0.0,20230324_396.0_2023-02-24,0,-0.100401,-1,11,92.4913,51.6494,20542.515862,36731.428184,36786.564749,55.136565,-19646.0,0.0,-18777.0,869.0,17085.428184,18009.564749,924.136565,0.0,0.0,0.0
8733,2023-02-27,2023-03-24,399.0,397.73,20230324_399.0,-0.035796,-0.035796,0.04199,16.700683,-13.767759,-14.237143,-0.469384,-19.71,0.0,-16.65,3.06,-33.477759,-30.887143,2.590616,0.0,0.0,0.0,20230324_399.0_2023-02-21,0,-0.103989,-1,11,-39.3756,46.189,18370.75097,-15144.534614,-15660.857388,-516.322774,-21681.0,0.0,-18315.0,3366.0,-36825.534614,-33975.857388,2849.677226,0.0,0.0,0.0
8734,2023-02-28,2023-03-24,396.0,396.26,20230324_396.0,0.020843,0.0,-0.084083,-33.31873,0.0,0.0,0.0,0.0,16.3,0.0,0.0,0.0,0.0,0.0,-0.073478,1.56,1.486522,20230324_396.0_2023-02-24,0,-0.100401,-1,11,0.0,-92.4913,-36650.602538,0.0,0.0,0.0,0.0,17930.0,0.0,0.0,0.0,0.0,0.0,-80.825646,1716.0,1635.174354
8735,2023-02-28,2023-03-24,399.0,396.26,20230324_399.0,-0.108683,0.0,0.035796,14.184523,0.0,0.0,0.0,0.0,15.92,0.0,0.0,0.0,0.0,0.0,-0.416764,3.79,3.373236,20230324_399.0_2023-02-21,0,-0.103989,-1,11,0.0,39.3756,15602.975256,0.0,0.0,0.0,0.0,17512.0,0.0,0.0,0.0,0.0,0.0,-458.440642,4169.0,3710.559358


### PL Positions

#### Prep

In [68]:
PL_temp_dfs = {}  
for key, df in trades_dfs.items():
    # Group by 'dates' and sum the columns of interest
    columns_to_sum = [] 
    for col in ['stock_pos', 'change_cost_basis',	'stock_cost_basis',	'daily_stock_value',	'stock_PL',	'option_cost_basis', 'change_cost_basis_op',
                'daily_option_value',	'option_PL',	'total_cost_basis',	'total_pos_value',	'total_PL',	'realized_stock_PL',
                'realized_option_PL',	'realized_PL']:
        columns_to_sum.append('sized_' + col)
    
    
    grouped_df = df[['date'] + columns_to_sum].groupby('date').sum().reset_index()
    
    # Reindex the resulting DataFrame to match 'trading_days', filling missing values with 0 or other appropriate value
    pl_df = grouped_df.set_index('date').reindex(trading_days).fillna(0).reset_index()
    pl_df.rename(columns={'index': 'date'}, inplace=True)
    
    # Assign the processed DataFrame to PL_dfs with the same key
    PL_temp_dfs[key] = pl_df
    
PL_temp_dfs['trade_1'][50:84]

Unnamed: 0,date,sized_stock_pos,sized_change_cost_basis,sized_stock_cost_basis,sized_daily_stock_value,sized_stock_PL,sized_option_cost_basis,sized_change_cost_basis_op,sized_daily_option_value,sized_option_PL,sized_total_cost_basis,sized_total_pos_value,sized_total_PL,sized_realized_stock_PL,sized_realized_option_PL,sized_realized_PL
50,2018-03-15,-4905.3341,-199567.8,-1378472.0,-1348967.0,29504.771606,157363.0,11052.0,104763.0,-52600.0,-1221109.0,-1244204.0,-23095.228394,-5556.721428,5130.0,-426.721428
51,2018-03-16,-5069.7366,-45079.17,-1423551.0,-1390122.0,33429.038886,157363.0,0.0,100406.0,-56957.0,-1266188.0,-1289716.0,-23527.961114,0.0,0.0,0.0
52,2018-03-19,2211.3897,1969472.0,545921.0,598158.8,52237.761672,157363.0,0.0,98938.0,-58425.0,703284.0,697096.8,-6187.238328,0.0,0.0,0.0
53,2018-03-20,-984.7594,-865996.6,-299271.0,-266820.6,32450.461897,102036.0,-30927.0,61341.0,-40695.0,-197235.0,-205479.6,-8244.538103,20804.539037,-24400.0,-3595.460963
54,2018-03-21,-1481.19,-134249.7,-412780.9,-400558.2,12222.7261,48700.0,-26963.0,29150.0,-19550.0,-364080.9,-371408.2,-7327.2739,20739.810685,-26373.0,-5633.189315
55,2018-03-22,27.8096,397877.9,7332.557,7332.557,0.0,-14053.0,-46353.0,-14261.0,-208.0,-6720.443,-6928.443,-208.0,22235.5705,-16400.0,5835.5705
56,2018-03-23,-497.3449,-135516.1,-128183.6,-128339.9,-156.289952,-29120.0,-15067.0,-30420.0,-1300.0,-157303.6,-158759.9,-1456.289952,0.0,0.0,0.0
57,2018-03-26,662.8197,307571.2,179387.7,175720.1,-3667.544946,-48481.0,-19361.0,-48179.0,302.0,130906.7,127541.1,-3365.544946,0.0,0.0,0.0
58,2018-03-27,-437.0603,-286628.7,-107241.1,-113897.9,-6656.861793,-69343.0,-20862.0,-69539.0,-196.0,-176584.1,-183436.9,-6852.861793,0.0,0.0,0.0
59,2018-03-28,-684.9435,-64407.49,-171648.5,-177968.9,-6320.325362,-88638.0,-19295.0,-89346.0,-708.0,-260286.5,-267314.9,-7028.325362,0.0,0.0,0.0


In [69]:
divvies = {}

for key, df in PL_temp_dfs.items():
    # Ensure 'date' in both DataFrames is in datetime format
    df['date'] = pd.to_datetime(df['date'])
    spy_divdata['date'] = pd.to_datetime(spy_divdata['date'])
    spy_divdata['pay_date'] = pd.to_datetime(spy_divdata['pay_date'])
    
    # First Merge: Merge to align 'pos_size' with 'spy_divdata' based on 'date'
    # Assuming 'pos_size' exists in your df
    temp_merged = pd.merge(spy_divdata, df[['date', 'sized_stock_pos']], how='left', on='date')

    # Correctly multiply 'signed_pos' by 'dividend' column
    temp_merged['div'] = temp_merged['sized_stock_pos'] * temp_merged['dividend']
    
    # Update the DataFrame in divvies with the final merged DataFrame
    divvies[key] = temp_merged

divvies['trade_1'].head(50)

Unnamed: 0,date,dividend,pay_date,sized_stock_pos,div
0,2018-03-16,1.09678,2018-04-30,-5069.7366,-5560.385708
1,2018-06-15,1.246,2018-07-31,0.0,0.0
2,2018-09-21,1.323,2018-10-31,-1631.3276,-2158.246415
3,2018-12-21,1.4354,2019-01-31,-1342.4627,-1926.97096
4,2019-03-15,1.2331,2019-04-30,-3734.6172,-4605.156469
5,2019-06-21,1.4316,2019-07-31,659.6096,944.297103
6,2019-09-20,1.38362,2019-10-31,0.0,0.0
7,2019-12-20,1.57,2020-01-31,-1650.5574,-2591.375118
8,2020-03-20,1.40556,2020-04-30,-1472.6726,-2069.9297
9,2020-06-19,1.3662,2020-07-31,822.8804,1124.219202


In [70]:
for key, pl_df in PL_temp_dfs.items():
    
    # Get the corresponding DataFrame from divvies
    div_df = divvies[key]
    pl_df['date'] = pd.to_datetime(pl_df['date'])
    # Perform the left merge
    # Note: Ensure 'date' in PL_temp_dfs and 'pay_date' in divvies are of compatible types (preferably datetime)
    merged_df = pd.merge(pl_df, div_df[['pay_date', 'div']], how='left', left_on='date', right_on='pay_date')
    
    # Optionally, you can drop the 'pay_date' column after the merge if it's no longer needed
    merged_df.drop(columns=['pay_date'], inplace=True)
    
    # Assuming 'div' can be NaN, fill NaNs with 0 for accurate calculations
    merged_df['div'] = merged_df['div'].fillna(0)
    
    # Update 'sized_realized_stock_PL' and 'sized_realized_PL' by adding 'div'
    merged_df['sized_realized_stock_PL'] = merged_df['sized_realized_stock_PL'] + merged_df['div']
    merged_df['sized_realized_PL'] = merged_df['sized_realized_PL'] + merged_df['div']
    
    # Update the PL_temp_dfs dictionary with the merged and updated result
    PL_temp_dfs[key] = merged_df

In [71]:
PL_temp_dfs['trade_1'][-2:]

Unnamed: 0,date,sized_stock_pos,sized_change_cost_basis,sized_stock_cost_basis,sized_daily_stock_value,sized_stock_PL,sized_option_cost_basis,sized_change_cost_basis_op,sized_daily_option_value,sized_option_PL,sized_total_cost_basis,sized_total_pos_value,sized_total_PL,sized_realized_stock_PL,sized_realized_option_PL,sized_realized_PL,div
1296,2023-02-27,53.1157,449386.138302,21586.89357,21125.707361,-461.186209,-41327.0,21000.0,-37092.0,4235.0,-19740.10643,-15966.292639,3773.813791,-2223.604995,4680.0,2456.395005,0.0
1297,2023-02-28,0.0,-21047.627282,0.0,0.0,0.0,0.0,35442.0,0.0,0.0,0.0,0.0,0.0,-539.266288,5885.0,5345.733712,0.0


In [72]:
start_date = '2018-01-01'
end_date = '2023-02-28'

tbills_data = pd.read_csv('tbill_data.csv')[['CALDT','TDDURATN','TMATDT','TDNOMPRC']].sort_values(by=['CALDT','TDDURATN']).reset_index(drop=True)
tbills_data = tbills_data.rename(columns = {
    'TMATDT': 'maturity_date',
    'CALDT': 'date',
    'TDNOMPRC': 'price',
    'TDDURATN': 'dte'
})
tbills_data['maturity_date'] = pd.to_datetime(tbills_data['maturity_date'])
tbills_data['date'] = pd.to_datetime(tbills_data['date'])
tbills_data = tbills_data.loc[(tbills_data['date'] >= start_date) & (tbills_data['date'] <= end_date)].copy().reset_index(drop=True)
tbills_data = tbills_data.drop_duplicates(subset='date', keep='first').reset_index(drop=True)
tbills_data = tbills_data[tbills_data['date'].isin(trading_days)].copy().reset_index(drop=True)
tbills_data['rate'] = (100 / tbills_data['price']) ** (1 / tbills_data['dte']) - 1
tbills_data['leverage_rate'] = ((tbills_data['rate'] + 1) ** 365 + 25/100/100) ** (1/365) - 1 # 25 bps to loan anything (leverage)

We use smallest DTE to approximate an overnight risk-free rate.

In [73]:
tbills_data

Unnamed: 0,date,dte,maturity_date,price,rate,leverage_rate
0,2018-01-02,2.0,2018-01-04,99.994083,0.000030,0.000036
1,2018-01-03,1.0,2018-01-04,99.996944,0.000031,0.000037
2,2018-01-04,7.0,2018-01-11,99.976375,0.000034,0.000041
3,2018-01-05,6.0,2018-01-11,99.979583,0.000034,0.000041
4,2018-01-08,3.0,2018-01-11,99.989750,0.000034,0.000041
...,...,...,...,...,...,...
1283,2023-02-22,1.0,2023-02-23,99.987410,0.000126,0.000132
1284,2023-02-23,5.0,2023-02-28,99.935972,0.000128,0.000135
1285,2023-02-24,4.0,2023-02-28,99.950389,0.000124,0.000131
1286,2023-02-27,1.0,2023-02-28,99.987639,0.000124,0.000130


In [74]:
trading_days_df = pd.DataFrame({'date': trading_days})

trading_days_df['date'] = pd.to_datetime(trading_days_df['date'])
tbills_data['date'] = pd.to_datetime(tbills_data['date'])

rfr = trading_days_df.merge(tbills_data[['date', 'rate', 'leverage_rate']], on='date', how='left', sort=True)

rfr['rate'] = rfr['rate'].ffill()
rfr['leverage_rate'] = rfr['leverage_rate'].ffill()

rfr

Unnamed: 0,date,rate,leverage_rate
0,2018-01-02,0.000030,0.000036
1,2018-01-03,0.000031,0.000037
2,2018-01-04,0.000034,0.000041
3,2018-01-05,0.000034,0.000041
4,2018-01-08,0.000034,0.000041
...,...,...,...
1293,2023-02-22,0.000126,0.000132
1294,2023-02-23,0.000128,0.000135
1295,2023-02-24,0.000124,0.000131
1296,2023-02-27,0.000124,0.000130


#### Final Dataframes

When we are short shares, we do not need to borrow money, hence we don't go above initial capital of 1 million, which is why lever cash is 0. We pay to borrow the shares under short fees.

In [75]:
%%time

PL_dfs = {}  # New dictionary to store the processed DataFrames

for key, df in PL_temp_dfs.items():
    # Create a new DataFrame for PL_dfs
    pl_df = pd.DataFrame(index=df.index)
    
    # Assuming 'date' is the column you want to keep as is
    pl_df['date'] = df['date']
    
    # Sum 'sized_change_cost_basis' and 'sized_change_cost_basis_op' then calculate 1 bps as trading costs
    pl_df['gross_stock_trades'] = abs(df['sized_change_cost_basis'])
    pl_df['gross_option_trades'] = abs(df['sized_change_cost_basis_op'])
    pl_df['gross_trades_value'] = pl_df['gross_stock_trades'] + pl_df['gross_option_trades']
    pl_df['stock_trading_costs'] = 1/100/100 * pl_df['gross_stock_trades']
    pl_df['option_trading_costs'] = 1/100/100 * pl_df['gross_option_trades']
    pl_df['net_trading_costs'] = 1/100/100 * pl_df['gross_trades_value']

    # Positions
    pl_df['stock_pos_value']  = df['sized_daily_stock_value']
    pl_df['option_pos_value'] = df['sized_daily_option_value']
    pl_df['gross_pos_value'] = pl_df['stock_pos_value'] + pl_df['option_pos_value']

    # Calculate cumulative sum of PL after including trading costs
    real_stock_PL = df['sized_realized_stock_PL'] - pl_df['stock_trading_costs']
    real_option_PL = df['sized_realized_option_PL'] - pl_df['option_trading_costs']
    real_net_PL = df['sized_realized_PL'] - pl_df['net_trading_costs']
    pl_df['stock_PL'] = real_stock_PL.cumsum()
    pl_df['option_PL'] = real_option_PL.cumsum()
    pl_df['net_PL'] = real_net_PL.cumsum()

    # Iterate over rows to determine interest paid and interest earned
    pl_df['start_cash'] = 0.0
    pl_df['initial_kapital'] = INITIAL
    pl_df['short_fee'] = 0.0
    pl_df['initial_cash'] = 0.0
    pl_df['interest'] = 0.0
    pl_df['lever_cash'] = 0.0
    pl_df['leverage_fee'] = 0.0
    pl_df['end_kapital'] = 0.0
    pl_df.loc[0, 'start_cash'] = INITIAL

    # Short fees, leverage fees, interest income; note that only short shares requires fees, options don't require fees (sell to open)
    for i in range(0, len(pl_df)):
        if i > 0:
            pl_df.loc[i, 'start_cash'] = pl_df.loc[i - 1, 'end_kapital']
        pl_df.loc[i, 'short_fee'] = - min(0.0, df.loc[i, 'sized_daily_stock_value']) * rfr.loc[i, 'leverage_rate']
        pl_df.loc[i, 'initial_kapital'] = pl_df.loc[i, 'start_cash'] + real_net_PL[i] - pl_df.loc[i, 'short_fee']
        pl_df.loc[i, 'initial_cash'] = max(pl_df.loc[i, 'initial_kapital'] - df.loc[i, 'sized_total_cost_basis'], 0.0)
        pl_df.loc[i, 'interest'] = pl_df.loc[i, 'initial_cash'] * rfr.loc[i, 'rate']
        pl_df.loc[i, 'lever_cash'] = max(df.loc[i, 'sized_total_cost_basis'] - pl_df.loc[i, 'initial_kapital'], 0.0)
        pl_df.loc[i, 'leverage_fee'] = pl_df.loc[i, 'lever_cash'] * rfr.loc[i, 'leverage_rate']
        pl_df.loc[i, 'end_kapital'] = pl_df.loc[i, 'initial_kapital'] + pl_df.loc[i, 'interest'] - pl_df.loc[i, 'leverage_fee']

    # Net fees/interest
    pl_df['net_short_fees'] = pl_df['short_fee'].cumsum()
    pl_df['net_interest_paid'] = pl_df['interest'].cumsum()
    pl_df['net_interest_earned'] = pl_df['leverage_fee'].cumsum()

    # Value of all positions
    pl_df['net_pos_value'] = pl_df['end_kapital'] - df['sized_total_cost_basis'] + pl_df['gross_pos_value']
    pl_df['tot_cash'] = KAPITAL - df['sized_total_cost_basis'] + pl_df['net_PL'] - pl_df['net_interest_paid'] + pl_df['net_interest_earned']

    
    # Assign the processed DataFrame to the new dictionary
    PL_dfs[key] = pl_df

CPU times: total: 3.72 s
Wall time: 3.71 s


In [76]:
pl_df

Unnamed: 0,date,gross_stock_trades,gross_option_trades,gross_trades_value,stock_trading_costs,option_trading_costs,net_trading_costs,stock_pos_value,option_pos_value,gross_pos_value,stock_PL,option_PL,net_PL,start_cash,initial_kapital,short_fee,initial_cash,interest,lever_cash,leverage_fee,end_kapital,net_short_fees,net_interest_paid,net_interest_earned,net_pos_value,tot_cash
0,2018-01-02,0.000000,0.0,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000e+00,0.000000e+00,0.000000,1.000000e+06,1.000000e+06,-0.000000,1.000000e+06,29.584646,0.0,0.0,1.000030e+06,-0.000000,29.584646,0.000000,1.000030e+06,9.999970e+06
1,2018-01-03,0.000000,0.0,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000e+00,0.000000e+00,0.000000,1.000030e+06,1.000030e+06,-0.000000,1.000030e+06,30.557393,0.0,0.0,1.000060e+06,-0.000000,60.142039,0.000000,1.000060e+06,9.999940e+06
2,2018-01-04,0.000000,0.0,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000e+00,0.000000e+00,0.000000,1.000060e+06,1.000060e+06,-0.000000,1.000060e+06,33.756587,0.0,0.0,1.000094e+06,-0.000000,93.898626,0.000000,1.000094e+06,9.999906e+06
3,2018-01-05,0.000000,0.0,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000e+00,0.000000e+00,0.000000,1.000094e+06,1.000094e+06,-0.000000,1.000094e+06,34.035027,0.0,0.0,1.000128e+06,-0.000000,127.933653,0.000000,1.000128e+06,9.999872e+06
4,2018-01-08,0.000000,0.0,0.000000,0.000000,0.0000,0.000000,0.000000,0.0,0.000000,0.000000e+00,0.000000e+00,0.000000,1.000128e+06,1.000128e+06,-0.000000,1.000128e+06,34.173373,0.0,0.0,1.000162e+06,-0.000000,162.107026,0.000000,1.000162e+06,9.999838e+06
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1293,2023-02-22,368794.369290,1605.0,370399.369290,36.879437,0.1605,37.039937,-310943.697780,-62028.0,-372971.697780,-1.704122e+06,2.430654e+06,726532.877567,1.746679e+06,1.746317e+06,41.185531,2.121098e+06,267.085699,0.0,0.0,1.746584e+06,8707.858452,39678.615408,10919.221232,1.748393e+06,1.107255e+07
1294,2023-02-23,97096.865918,17620.0,114716.865918,9.709687,1.7620,11.471687,-215500.870702,-75166.0,-290666.870702,-1.704131e+06,2.430653e+06,726521.405880,1.746584e+06,1.746544e+06,29.013807,2.041847e+06,261.570382,0.0,0.0,1.746805e+06,8736.872258,39940.185789,10919.221232,1.751442e+06,1.099280e+07
1295,2023-02-24,306543.247660,19646.0,326189.247660,30.654325,1.9646,32.618925,-519742.057446,-97544.0,-617286.057446,-1.704162e+06,2.430651e+06,726488.786956,1.746805e+06,1.746705e+06,67.881098,2.368198e+06,293.813392,0.0,0.0,1.746999e+06,8804.753357,40233.999182,10919.221232,1.751205e+06,1.131867e+07
1296,2023-02-27,483284.308245,4665.0,487949.308245,48.328431,0.4665,48.794931,-38227.898496,-86690.0,-124917.898496,-1.706434e+06,2.435330e+06,728896.387030,1.746999e+06,1.749401e+06,4.975994,1.880488e+06,232.477988,0.0,0.0,1.749634e+06,8809.729350,40466.477170,10919.221232,1.755803e+06,1.083044e+07


In [77]:
PL_dfs['trade_2'][-2:]

Unnamed: 0,date,gross_stock_trades,gross_option_trades,gross_trades_value,stock_trading_costs,option_trading_costs,net_trading_costs,stock_pos_value,option_pos_value,gross_pos_value,stock_PL,option_PL,net_PL,start_cash,initial_kapital,short_fee,initial_cash,interest,lever_cash,leverage_fee,end_kapital,net_short_fees,net_interest_paid,net_interest_earned,net_pos_value,tot_cash
1296,2023-02-27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,797173.542235,-1146922.0,-349748.667365,683225.069753,683225.069753,-0.0,683225.069753,84.464651,0.0,0.0,683309.534404,4719.494988,39039.947929,1262.251172,683309.534404,9612474.0
1297,2023-02-28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,797173.542235,-1146922.0,-349748.667365,683309.534404,683309.534404,-0.0,683309.534404,82.581537,0.0,0.0,683392.115941,4719.494988,39122.529466,1262.251172,683392.115941,9612391.0


In [78]:
PL_dfs['trade_1']['net_pos_value'][PL_dfs['trade_1']['net_pos_value'].idxmin()]

868579.2717882802

In [79]:
PL_dfs['trade_2']['net_pos_value'][PL_dfs['trade_2']['net_pos_value'].idxmin()]

596810.1024514363

In [80]:
PL_dfs['trade_3']['net_pos_value'][PL_dfs['trade_3']['net_pos_value'].idxmin()]

981452.1037887249

In [81]:
PL_dfs['trade_1']['tot_cash'][PL_dfs['trade_1']['tot_cash'].idxmin()]

2987330.136676361

In [82]:
PL_dfs['trade_1']['tot_cash'][PL_dfs['trade_1']['tot_cash'].idxmax()]

15620853.359602839

In [83]:
PL_dfs['trade_2']['tot_cash'][PL_dfs['trade_2']['tot_cash'].idxmin()]

2468237.6778471903

In [84]:
PL_dfs['trade_2']['tot_cash'][PL_dfs['trade_2']['tot_cash'].idxmax()]

15140576.721228467

In [85]:
PL_dfs['trade_3']['tot_cash'][PL_dfs['trade_3']['tot_cash'].idxmin()]

5649798.585902663

In [86]:
PL_dfs['trade_3']['tot_cash'][PL_dfs['trade_3']['tot_cash'].idxmax()]

15511034.440640306

In [89]:
PL_dfs['trade_2'][-2:]

Unnamed: 0,date,gross_stock_trades,gross_option_trades,gross_trades_value,stock_trading_costs,option_trading_costs,net_trading_costs,stock_pos_value,option_pos_value,gross_pos_value,stock_PL,option_PL,net_PL,start_cash,initial_kapital,short_fee,initial_cash,interest,lever_cash,leverage_fee,end_kapital,net_short_fees,net_interest_paid,net_interest_earned,net_pos_value,tot_cash
1296,2023-02-27,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,797173.542235,-1146922.0,-349748.667365,683225.069753,683225.069753,-0.0,683225.069753,84.464651,0.0,0.0,683309.534404,4719.494988,39039.947929,1262.251172,683309.534404,9612474.0
1297,2023-02-28,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,797173.542235,-1146922.0,-349748.667365,683309.534404,683309.534404,-0.0,683309.534404,82.581537,0.0,0.0,683392.115941,4719.494988,39122.529466,1262.251172,683392.115941,9612391.0


In [88]:
'''
os.makedirs('simdata', exist_ok=True)

for strat, df in PL_dfs.items():
    csv_path = f'simdata/PL_{strat}.csv'
    df.to_csv(csv_path, index=False)
'''

"\nos.makedirs('simdata', exist_ok=True)\n\nfor strat, df in PL_dfs.items():\n    csv_path = f'simdata/PL_{strat}.csv'\n    df.to_csv(csv_path, index=False)\n"