In [1]:
import numpy as np
from openbb import obb
obb.user.preferences.output_type = "dataframe"
import matplotlib.pyplot as plt
import pandas as pd
import statsmodels.api as sm
from datetime import datetime, timedelta

# Strategy: Buy SPY if it had a big decline on Tuesday 
# hold for specified number of days

## Specify parameters for analysis

In [2]:
holding_period = 4
start_date = "2020-02-01"
end_date = pd.Timestamp.today().strftime("%Y-%m-%d")

## Get data

In [3]:
# Get daily price data
daily_ohlc = spy = obb.equity.price.historical(
    "SPY",
    start_date = start_date,
    end_date = end_date,
    provider="cboe"     # 20250514 openbb calls using yfinance are broken
)
daily_ohlc.index = pd.to_datetime(daily_ohlc.index).tz_localize("US/Eastern")

In [4]:
daily_ohlc

Unnamed: 0_level_0,open,high,low,close,volume
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2020-02-03 00:00:00-05:00,323.35,326.16,323.22,324.12,66682806
2020-02-04 00:00:00-05:00,328.07,330.01,327.72,329.06,59155664
2020-02-05 00:00:00-05:00,332.29,333.09,330.67,332.86,62183365
2020-02-06 00:00:00-05:00,333.98,334.19,332.80,333.98,48000411
2020-02-07 00:00:00-05:00,332.81,333.99,331.60,332.20,61325887
...,...,...,...,...,...
2025-08-05 00:00:00-04:00,631.77,632.61,627.04,627.97,66894852
2025-08-06 00:00:00-04:00,629.09,633.44,628.13,632.78,62632558
2025-08-07 00:00:00-04:00,636.22,636.98,629.11,632.25,72870868
2025-08-08 00:00:00-04:00,634.15,637.65,633.74,637.18,61602384


## Identify trade setups

### Set up trade criteria

We want to evaluate Tuesdays on which SPY had a big loss, defined as the price declining by an amount greater than the 30-day average true range.

In [5]:
# 30-day average true range (ATR)
daily_ohlc['true_range'] = (np.maximum(daily_ohlc['high'], daily_ohlc['close'].shift(1))
                            - np.minimum(daily_ohlc['low'], daily_ohlc['close'].shift(1)))
daily_ohlc['atr'] = daily_ohlc['true_range'].rolling(30).mean()

In [6]:
# A buy setup is a Wednesday with a price decline greater than the ATR
daily_ohlc['buy_setup_int'] = 0
daily_ohlc.loc[((daily_ohlc.index.dayofweek == 1) 
                & (daily_ohlc['close'] - daily_ohlc['close'].shift(1) <= - daily_ohlc['atr'].shift(1))),
               'buy_setup_int'] = 1

In [7]:
daily_ohlc.iloc[-20:]

Unnamed: 0_level_0,open,high,low,close,volume,true_range,atr,buy_setup_int
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-07-15 00:00:00-04:00,627.84,627.86,622.06,622.14,72082794,5.8,5.351667,0
2025-07-16 00:00:00-04:00,623.76,624.73,618.05,624.22,88025247,6.68,5.316667,0
2025-07-17 00:00:00-04:00,624.42,628.4,624.18,628.04,68120945,4.22,5.283,0
2025-07-18 00:00:00-04:00,629.32,629.47,626.46,627.58,61048183,3.01,5.301333,0
2025-07-21 00:00:00-04:00,628.74,631.54,628.34,628.77,62246399,3.96,5.168333,0
2025-07-22 00:00:00-04:00,629.05,629.73,626.19,628.86,58980405,3.54,5.027,0
2025-07-23 00:00:00-04:00,631.53,634.21,629.73,634.21,69270588,5.35,5.113333,0
2025-07-24 00:00:00-04:00,634.65,636.15,633.99,634.42,70089283,2.16,5.039333,0
2025-07-25 00:00:00-04:00,635.12,637.58,634.84,637.1,55876018,3.16,4.951667,0
2025-07-28 00:00:00-04:00,637.8,638.04,635.54,636.94,54222011,2.5,4.894,0


### Set target position for each period

In [8]:
# Set target position to 1 (100%) for the holding period after a buy setup
daily_ohlc.loc[daily_ohlc['buy_setup_int'].rolling(holding_period).sum() >= 1, 'target_position'] = 1.00
daily_ohlc.loc[daily_ohlc['buy_setup_int'].rolling(holding_period).sum() < 1, 'target_position'] = 0.00

In [9]:
daily_ohlc.loc[daily_ohlc['target_position'] == 1]

Unnamed: 0_level_0,open,high,low,close,volume,true_range,atr,buy_setup_int,target_position
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-09-08 00:00:00-04:00,336.74,342.64,332.88,333.21,110806934,9.76,4.243333,1,1.0
2020-09-09 00:00:00-04:00,337.52,342.46,336.61,339.79,89410368,9.25,4.458667,0,1.0
2020-09-10 00:00:00-04:00,341.75,342.53,332.85,333.89,89210400,9.68,4.629333,0,1.0
2020-09-11 00:00:00-04:00,335.79,336.97,331.00,334.06,82233711,5.97,4.645667,0,1.0
2021-05-11 00:00:00-04:00,413.12,415.27,410.06,414.21,115246459,7.88,3.709667,1,1.0
...,...,...,...,...,...,...,...,...,...
2024-09-06 00:00:00-04:00,549.88,551.60,539.44,540.36,66093048,12.16,8.696000,0,1.0
2025-01-07 00:00:00-05:00,597.39,597.75,586.78,588.63,58722623,10.97,6.331333,1,1.0
2025-01-08 00:00:00-05:00,588.77,590.58,585.20,589.49,45818084,5.38,6.322000,0,1.0
2025-01-10 00:00:00-05:00,585.80,585.95,578.55,580.49,72156027,10.94,6.560000,0,1.0


## Calculate net changes of entire time period (population) and of trades (sample)

In [10]:
# Select the days with buy or sell setups
transactions = daily_ohlc.loc[
    ((daily_ohlc['target_position'] == 1) & (daily_ohlc['target_position'].shift(1) == 0))
    | ((daily_ohlc['target_position'] == 0) & (daily_ohlc['target_position'].shift(1) == 1))][['close', 'buy_setup_int']]

In [11]:
# Calculate forward return
transactions['exit_close'] = transactions['close'].shift(-1)
transactions['y_log_return'] = np.log(transactions['close'].shift(-1)) - np.log(transactions['close'])

In [12]:
transactions

Unnamed: 0_level_0,close,buy_setup_int,exit_close,y_log_return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-09-08 00:00:00-04:00,333.21,1,338.46,0.015633
2020-09-14 00:00:00-04:00,338.46,0,414.21,0.201967
2021-05-11 00:00:00-04:00,414.21,1,415.52,0.003158
2021-05-17 00:00:00-04:00,415.52,0,433.72,0.042868
2021-09-28 00:00:00-04:00,433.72,1,428.64,-0.011782
2021-10-04 00:00:00-04:00,428.64,0,455.56,0.06091
2021-11-30 00:00:00-05:00,455.56,1,458.79,0.007065
2021-12-06 00:00:00-05:00,458.79,0,456.49,-0.005026
2022-01-18 00:00:00-05:00,456.49,1,439.84,-0.037156
2022-01-24 00:00:00-05:00,439.84,0,416.1,-0.055485


In [13]:
trades = transactions.loc[transactions['buy_setup_int'] == 1].dropna()

In [14]:
trades

Unnamed: 0_level_0,close,buy_setup_int,exit_close,y_log_return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-09-08 00:00:00-04:00,333.21,1,338.46,0.015633
2021-05-11 00:00:00-04:00,414.21,1,415.52,0.003158
2021-09-28 00:00:00-04:00,433.72,1,428.64,-0.011782
2021-11-30 00:00:00-05:00,455.56,1,458.79,0.007065
2022-01-18 00:00:00-05:00,456.49,1,439.84,-0.037156
2022-04-26 00:00:00-04:00,416.1,1,414.48,-0.003901
2022-09-13 00:00:00-04:00,393.1,1,388.55,-0.011642
2023-02-21 00:00:00-05:00,399.09,1,397.73,-0.003414
2023-03-07 00:00:00-05:00,398.27,1,385.36,-0.032952
2023-04-25 00:00:00-04:00,406.08,1,415.51,0.022956


In [15]:
# Get log returns for the entire population based on the holding period
population = daily_ohlc.iloc[::holding_period][['close', 'buy_setup_int']]

In [16]:
# Calculate forward net return
population['exit_close'] = population['close'].shift(-1)
population['y_log_return'] = np.log(population['close'].shift(-1)) - np.log(population['close'])

In [17]:
population

Unnamed: 0_level_0,close,buy_setup_int,exit_close,y_log_return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2020-02-03 00:00:00-05:00,324.12,0,332.20,0.024623
2020-02-07 00:00:00-05:00,332.20,0,337.06,0.014524
2020-02-13 00:00:00-05:00,337.06,0,336.95,-0.000326
2020-02-20 00:00:00-05:00,336.95,0,311.50,-0.078535
2020-02-26 00:00:00-05:00,311.50,0,300.24,-0.036817
...,...,...,...,...
2025-07-17 00:00:00-04:00,628.04,0,634.21,0.009776
2025-07-23 00:00:00-04:00,634.21,0,635.26,0.001654
2025-07-29 00:00:00-04:00,635.26,0,631.17,-0.006459
2025-08-04 00:00:00-04:00,631.17,0,637.18,0.009477


## Calculate backtest statistics

In [18]:
trades['y_log_return'].describe()

count    20.000000
mean      0.000417
std       0.017285
min      -0.037156
25%      -0.010493
50%       0.003255
75%       0.009689
max       0.028651
Name: y_log_return, dtype: float64

In [19]:
print("The sample mean log return is ", trades['y_log_return'].mean())
print("The population mean log return is ", population['y_log_return'].mean())

The sample mean log return is  0.00041673408930029685
The population mean log return is  0.0019479492006664362


In [20]:
print("Backtest sample statistics (log returns):")
print("Mean: ", trades['y_log_return'].mean())
print("Standard deviation: ", trades['y_log_return'].std())
print("N: ", trades['y_log_return'].count())
print("t: ", (trades['y_log_return'].mean() - population['y_log_return'].mean()) /
      (trades['y_log_return'].std() / (trades['y_log_return'].count() ** 0.5)))

Backtest sample statistics (log returns):
Mean:  0.00041673408930029685
Standard deviation:  0.017284888040443987
N:  20
t:  -0.396172780428568


In [21]:
trades.iloc[-10:]

Unnamed: 0_level_0,close,buy_setup_int,exit_close,y_log_return
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-05-02 00:00:00-04:00,410.84,1,412.74,0.004614
2023-05-23 00:00:00-04:00,414.09,1,420.18,0.0146
2023-08-15 00:00:00-04:00,442.89,1,439.34,-0.008048
2023-09-26 00:00:00-04:00,425.88,1,427.31,0.003352
2023-10-03 00:00:00-04:00,421.59,1,432.29,0.025063
2024-02-13 00:00:00-05:00,494.08,1,496.76,0.00541
2024-03-05 00:00:00-05:00,507.18,1,511.28,0.008051
2024-04-30 00:00:00-04:00,501.98,1,516.57,0.028651
2024-09-03 00:00:00-04:00,552.08,1,546.41,-0.010323
2025-01-07 00:00:00-05:00,588.63,1,582.19,-0.011001
