In [1]:
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import pandas as pd
import statsmodels.api as sm
from datetime import datetime, timedelta

# Strategy: Buy SPY on the 5th consecutive all-time high 
# hold for specified number of days

## Specify parameters for analysis

In [2]:
holding_period = 5
start_date_string = "1993-02-01"
end_date_string = pd.Timestamp.today().strftime("%Y-%m-%d")

## Get data

In [3]:
# Get daily price data
spy = yf.Ticker("SPY")
daily_ohlc = spy.history(start=start_date_string, end=end_date_string, interval="1D")

  df.index += _pd.TimedeltaIndex(dst_error_hours, 'h')


In [4]:
daily_ohlc

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1993-02-01 00:00:00-05:00,24.858344,25.017353,24.858344,25.017353,480500,0.0,0.0,0.0
1993-02-02 00:00:00-05:00,24.999690,25.088028,24.946687,25.070360,201300,0.0,0.0,0.0
1993-02-03 00:00:00-05:00,25.105701,25.353048,25.088034,25.335381,529400,0.0,0.0,0.0
1993-02-04 00:00:00-05:00,25.423697,25.494367,25.141015,25.441364,531500,0.0,0.0,0.0
1993-02-05 00:00:00-05:00,25.423710,25.476713,25.282369,25.423710,492100,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
2024-01-18 00:00:00-05:00,474.010010,477.059998,472.420013,476.489990,91856200,0.0,0.0,0.0
2024-01-19 00:00:00-05:00,477.649994,482.720001,476.540009,482.429993,110733300,0.0,0.0,0.0
2024-01-22 00:00:00-05:00,484.010010,485.220001,482.779999,483.450012,75844900,0.0,0.0,0.0
2024-01-23 00:00:00-05:00,484.010010,485.109985,482.890015,484.859985,49945300,0.0,0.0,0.0


## Calculate running all-time high to date, and identify trade setups

In [5]:
daily_ohlc['cummax'] = daily_ohlc['Close'].cummax()

In [6]:
# Identify which days had all-time highs
daily_ohlc.loc[daily_ohlc['Close'] == daily_ohlc['cummax'], 'cummax_int'] = 1
daily_ohlc.loc[daily_ohlc['Close'] != daily_ohlc['cummax'], 'cummax_int'] = 0

In [7]:
# A buy setup is the fifth consecutive all-time high
daily_ohlc.loc[daily_ohlc['cummax_int'].rolling(5).sum() == 5, 'buy_setup_int'] = 1
daily_ohlc.loc[daily_ohlc['cummax_int'].rolling(5).sum() < 5.0, 'buy_setup_int'] = 0

In [8]:
# Set target position to 1 (100%) for the holding period after a buy setup
daily_ohlc.loc[daily_ohlc['buy_setup_int'].rolling(holding_period).sum() >= 1, 'target_position'] = 1.00
daily_ohlc.loc[daily_ohlc['buy_setup_int'].rolling(holding_period).sum() < 1, 'target_position'] = 0.00

In [9]:
daily_ohlc.loc[daily_ohlc['target_position'] == 1]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains,cummax,cummax_int,buy_setup_int,target_position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1993-08-20 00:00:00-04:00,26.135987,26.207495,26.118110,26.207495,80700,0.0,0.0,0.0,26.207495,1.0,1.0,1.0
1993-08-23 00:00:00-04:00,26.100241,26.171748,26.100241,26.153872,15600,0.0,0.0,0.0,26.207495,0.0,0.0,1.0
1993-08-24 00:00:00-04:00,26.153871,26.439901,26.153871,26.439901,273400,0.0,0.0,0.0,26.439901,1.0,0.0,1.0
1993-08-25 00:00:00-04:00,26.439889,26.565027,26.404135,26.457766,242300,0.0,0.0,0.0,26.457766,1.0,0.0,1.0
1993-08-26 00:00:00-04:00,26.475653,26.618668,26.368391,26.475653,120000,0.0,0.0,0.0,26.475653,1.0,0.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
2021-11-08 00:00:00-05:00,453.687984,454.199915,452.239118,452.944214,50405200,0.0,0.0,0.0,452.944214,1.0,1.0,1.0
2021-11-09 00:00:00-05:00,453.320951,453.562428,449.998217,451.447083,51149100,0.0,0.0,0.0,452.944214,0.0,0.0,1.0
2021-11-10 00:00:00-05:00,449.708392,451.447047,446.289091,447.815216,69429700,0.0,0.0,0.0,452.944214,0.0,0.0,1.0
2021-11-11 00:00:00-05:00,449.351026,449.428316,447.940806,447.960114,34848500,0.0,0.0,0.0,452.944214,0.0,0.0,1.0


## Calculate net changes of entire time period (population) and of trades (sample)

In [10]:
# Select the days with buy or sell setups
transactions = daily_ohlc.loc[
    ((daily_ohlc['buy_setup_int'] == 1) & (daily_ohlc['buy_setup_int'].shift(1) == 0))
    | ((daily_ohlc['buy_setup_int'] == 0) & (daily_ohlc['buy_setup_int'].shift(1) == 1))][['Close', 'buy_setup_int']]

In [11]:
transactions['exit_close'] = transactions['Close'].shift(-1)
transactions['y_log_return'] = np.log(transactions['Close'].shift(-1)) - np.log(transactions['Close'])

In [12]:
transactions

Unnamed: 0_level_0,Close,buy_setup_int,exit_close,y_log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1993-08-20 00:00:00-04:00,26.207495,1.0,26.153872,-0.002048
1993-08-23 00:00:00-04:00,26.153872,0.0,28.738445,0.094239
1995-02-08 00:00:00-05:00,28.738445,1.0,28.94297,0.007092
1995-02-16 00:00:00-05:00,28.94297,0.0,31.726366,0.091821
1995-05-16 00:00:00-04:00,31.726366,1.0,31.604837,-0.003838
1995-05-17 00:00:00-04:00,31.604837,0.0,34.63007,0.091412
1995-09-08 00:00:00-04:00,34.63007,1.0,35.35371,0.020681
1995-09-15 00:00:00-04:00,35.35371,0.0,46.076611,0.264902
1996-11-19 00:00:00-05:00,46.076611,1.0,46.086266,0.00021
1996-11-21 00:00:00-05:00,46.086266,0.0,48.951431,0.060314


In [13]:
trades = transactions.loc[transactions['buy_setup_int'] == 1]

In [14]:
trades

Unnamed: 0_level_0,Close,buy_setup_int,exit_close,y_log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1993-08-20 00:00:00-04:00,26.207495,1.0,26.153872,-0.002048
1995-02-08 00:00:00-05:00,28.738445,1.0,28.94297,0.007092
1995-05-16 00:00:00-04:00,31.726366,1.0,31.604837,-0.003838
1995-09-08 00:00:00-04:00,34.63007,1.0,35.35371,0.020681
1996-11-19 00:00:00-05:00,46.076611,1.0,46.086266,0.00021
1997-01-22 00:00:00-05:00,48.951431,1.0,48.272343,-0.01397
1997-06-12 00:00:00-04:00,55.449223,1.0,55.85825,0.00735
1998-03-20 00:00:00-05:00,69.471352,1.0,69.313278,-0.002278
2006-11-20 00:00:00-05:00,100.664398,1.0,100.556923,-0.001068
2007-02-07 00:00:00-05:00,104.618706,1.0,104.481781,-0.00131


In [15]:
# Get log returns for the entire population based on the holding period
population = daily_ohlc.iloc[::holding_period][['Close', 'buy_setup_int']]

In [16]:
# Repeat the same calculations for the population
population['exit_close'] = population['Close'].shift(-1)
population['y_log_return'] = np.log(population['Close'].shift(-1)) - np.log(population['Close'])

In [17]:
population

Unnamed: 0_level_0,Close,buy_setup_int,exit_close,y_log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1993-02-01 00:00:00-05:00,25.017353,,25.423710,0.016112
1993-02-08 00:00:00-05:00,25.423710,0.0,24.575674,-0.033925
1993-02-16 00:00:00-05:00,24.575674,0.0,24.699341,0.005019
1993-02-23 00:00:00-05:00,24.699341,0.0,25.406034,0.028210
1993-03-02 00:00:00-05:00,25.406034,0.0,25.777058,0.014498
...,...,...,...,...
2023-12-22 00:00:00-05:00,473.649994,0.0,472.649994,-0.002113
2024-01-02 00:00:00-05:00,472.649994,0.0,473.880005,0.002599
2024-01-09 00:00:00-05:00,473.880005,0.0,472.290009,-0.003361
2024-01-17 00:00:00-05:00,472.290009,0.0,485.390015,0.027359


## Calculate backtest statistics

In [18]:
trades['y_log_return'].describe()

count    27.000000
mean      0.000948
std       0.007057
min      -0.013970
25%      -0.003121
50%      -0.001068
75%       0.004588
max       0.020681
Name: y_log_return, dtype: float64

In [19]:
print("The sample mean log return is ", trades['y_log_return'].mean())
print("The population mean log return is ", population['y_log_return'].mean())

The sample mean log return is  0.0009480565009756083
The population mean log return is  0.001900886548270566


In [20]:
print("Backtest sample statistics (log returns):")
print("Mean: ", trades['y_log_return'].mean())
print("Standard deviation: ", trades['y_log_return'].std())
print("N: ", trades['y_log_return'].count())
print("t: ", (trades['y_log_return'].mean() - population['y_log_return'].mean()) / (trades['y_log_return'].std() / (trades['y_log_return'].count() ** 0.5)))

Backtest sample statistics (log returns):
Mean:  0.0009480565009756083
Standard deviation:  0.007056811349865626
N:  27
t:  -0.7015987693611289


In [21]:
trades.iloc[-10:]

Unnamed: 0_level_0,Close,buy_setup_int,exit_close,y_log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2017-10-04 00:00:00-04:00,227.790451,1.0,228.879166,0.004768
2017-10-20 00:00:00-04:00,231.344589,1.0,230.444778,-0.003897
2018-01-08 00:00:00-05:00,247.729858,1.0,247.910721,0.00073
2019-11-18 00:00:00-05:00,291.558624,1.0,291.474518,-0.000289
2019-12-18 00:00:00-05:00,298.632202,1.0,303.170929,0.015084
2020-08-27 00:00:00-04:00,330.458984,1.0,331.388733,0.00281
2021-04-13 00:00:00-04:00,396.215881,1.0,394.862732,-0.003421
2021-06-30 00:00:00-04:00,412.147095,1.0,416.836029,0.011313
2021-08-16 00:00:00-04:00,430.354065,1.0,427.533051,-0.006577
2021-11-03 00:00:00-04:00,448.877716,1.0,451.447083,0.005708
