In [1]:
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
import pandas as pd
import statsmodels.api as sm
from datetime import datetime, timedelta

# Strategy: Buy when NVDA goes up 5% or more in a day;
# hold for specified number of days

## Specify parameters for analysis

In [2]:
holding_period = 5
start_date_string = "2021-12-31"
tomorrows_date = (datetime.today() + timedelta(1)).strftime('%Y-%m-%d')
# I overrode the end date for this run because there was an incomplete trade at the end
# end_date_string = tomorrows_date
end_date_string = "2024-01-05"

## Get data

In [3]:
# Get daily price data
nvda = yf.Ticker("NVDA")
daily_ohlc = nvda.history(start=start_date_string, end=end_date_string, interval="1D")

In [4]:
daily_ohlc

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2021-12-31 00:00:00-05:00,296.331277,299.886372,292.906009,293.704895,26653000,0.0,0.0
2022-01-03 00:00:00-05:00,297.739352,306.687003,297.439778,300.795135,39154700,0.0,0.0
2022-01-04 00:00:00-05:00,302.352952,304.260324,283.099509,292.496552,52715400,0.0,0.0
2022-01-05 00:00:00-05:00,289.091278,293.754859,274.950776,275.659821,49806400,0.0,0.0
2022-01-06 00:00:00-05:00,276.019282,283.988301,270.277202,281.391876,45418600,0.0,0.0
...,...,...,...,...,...,...,...
2023-12-28 00:00:00-05:00,496.429993,498.839996,494.119995,495.220001,24658700,0.0,0.0
2023-12-29 00:00:00-05:00,498.130005,499.970001,487.510010,495.220001,38869000,0.0,0.0
2024-01-02 00:00:00-05:00,492.440002,492.950012,475.950012,481.679993,41125400,0.0,0.0
2024-01-03 00:00:00-05:00,474.850006,481.839996,473.200012,475.690002,32089600,0.0,0.0


## Calculate daily net change, and identify trade setups

In [5]:
daily_ohlc['log_change'] = np.log(daily_ohlc['Close']) - np.log(daily_ohlc['Close'].shift(1))

In [6]:
# A buy setup is a 5% increase in one day
daily_ohlc.loc[daily_ohlc['log_change'] >= 0.05, 'buy_setup_int'] = 1
daily_ohlc.loc[daily_ohlc['log_change'] < 0.05, 'buy_setup_int'] = 0

In [7]:
daily_ohlc

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,log_change,buy_setup_int
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2021-12-31 00:00:00-05:00,296.331277,299.886372,292.906009,293.704895,26653000,0.0,0.0,,
2022-01-03 00:00:00-05:00,297.739352,306.687003,297.439778,300.795135,39154700,0.0,0.0,0.023854,0.0
2022-01-04 00:00:00-05:00,302.352952,304.260324,283.099509,292.496552,52715400,0.0,0.0,-0.027977,0.0
2022-01-05 00:00:00-05:00,289.091278,293.754859,274.950776,275.659821,49806400,0.0,0.0,-0.059285,0.0
2022-01-06 00:00:00-05:00,276.019282,283.988301,270.277202,281.391876,45418600,0.0,0.0,0.020581,0.0
...,...,...,...,...,...,...,...,...,...
2023-12-28 00:00:00-05:00,496.429993,498.839996,494.119995,495.220001,24658700,0.0,0.0,0.002122,0.0
2023-12-29 00:00:00-05:00,498.130005,499.970001,487.510010,495.220001,38869000,0.0,0.0,0.000000,0.0
2024-01-02 00:00:00-05:00,492.440002,492.950012,475.950012,481.679993,41125400,0.0,0.0,-0.027722,0.0
2024-01-03 00:00:00-05:00,474.850006,481.839996,473.200012,475.690002,32089600,0.0,0.0,-0.012514,0.0


In [8]:
# Set target position to 1 (100%) for the holding period after a buy setup
daily_ohlc.loc[daily_ohlc['buy_setup_int'].rolling(holding_period).sum() >= 1, 'target_position'] = 1.00
daily_ohlc.loc[daily_ohlc['buy_setup_int'].rolling(holding_period).sum() < 1, 'target_position'] = 0.00

In [9]:
daily_ohlc.iloc[150:190]

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Dividends,Stock Splits,log_change,buy_setup_int,target_position
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2022-08-08 00:00:00-04:00,174.845687,182.208342,172.24827,177.752777,98185900,0.0,0.0,-0.065055,0.0,0.0
2022-08-09 00:00:00-04:00,172.348185,174.29624,167.073445,170.689835,66826300,0.0,0.0,-0.040546,0.0,0.0
2022-08-10 00:00:00-04:00,176.803733,180.999547,172.99753,180.789764,59742200,0.0,0.0,0.057487,1.0,1.0
2022-08-11 00:00:00-04:00,181.12942,186.883693,178.581956,179.241302,50932700,0.0,0.0,-0.008602,0.0,1.0
2022-08-12 00:00:00-04:00,181.419134,186.993563,179.351188,186.903656,47809400,0.0,0.0,0.04186,0.0,1.0
2022-08-15 00:00:00-04:00,186.823731,191.449125,185.944618,190.130447,45766600,0.0,0.0,0.017117,0.0,1.0
2022-08-16 00:00:00-04:00,189.011555,191.229345,184.825725,188.601959,45124000,0.0,0.0,-0.008072,0.0,1.0
2022-08-17 00:00:00-04:00,185.045521,186.693887,181.269287,183.167404,44679900,0.0,0.0,-0.029238,0.0,0.0
2022-08-18 00:00:00-04:00,182.827714,188.462096,181.648897,187.543015,41544700,0.0,0.0,0.023608,0.0,0.0
2022-08-19 00:00:00-04:00,183.87667,185.015533,177.523004,178.312225,44158800,0.0,0.0,-0.050472,0.0,0.0


In [10]:
# Select the days with buy or sell setups
transactions = daily_ohlc.loc[
    ((daily_ohlc['buy_setup_int'] == 1) & (daily_ohlc['buy_setup_int'].shift(1) == 0))
    | ((daily_ohlc['buy_setup_int'] == 0) & (daily_ohlc['buy_setup_int'].shift(1) == 1))][['Close', 'buy_setup_int']]

In [11]:
transactions['exit_close'] = transactions['Close'].shift(-1)
transactions['y_log_return'] = np.log(transactions['Close'].shift(-1)) - np.log(transactions['Close'])

In [12]:
transactions

Unnamed: 0_level_0,Close,buy_setup_int,exit_close,y_log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-31 00:00:00-05:00,244.522766,1.0,246.040665,0.006188
2022-02-01 00:00:00-05:00,246.040665,0.0,266.682159,0.080561
2022-02-09 00:00:00-05:00,266.682159,1.0,257.884308,-0.033546
2022-02-10 00:00:00-05:00,257.884308,0.0,264.585144,0.025652
2022-02-15 00:00:00-05:00,264.585144,1.0,264.744843,0.000603
...,...,...,...,...
2023-05-26 00:00:00-04:00,389.353363,0.0,437.455505,0.116488
2023-08-14 00:00:00-04:00,437.455505,1.0,439.325165,0.004265
2023-08-15 00:00:00-04:00,439.325165,0.0,469.590027,0.066620
2023-08-21 00:00:00-04:00,469.590027,1.0,456.602203,-0.028047


## Calculate net changes of entire time period (population) and of trades (sample)

In [13]:
trades = transactions.loc[transactions['buy_setup_int'] == 1]

In [14]:
trades

Unnamed: 0_level_0,Close,buy_setup_int,exit_close,y_log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-01-31 00:00:00-05:00,244.522766,1.0,246.040665,0.006188
2022-02-09 00:00:00-05:00,266.682159,1.0,257.884308,-0.033546
2022-02-15 00:00:00-05:00,264.585144,1.0,264.744843,0.000603
2022-02-24 00:00:00-05:00,237.152908,1.0,241.237289,0.017076
2022-03-09 00:00:00-05:00,229.862183,1.0,226.306488,-0.01559
2022-03-15 00:00:00-04:00,229.452667,1.0,247.361038,0.075152
2022-03-18 00:00:00-04:00,264.210663,1.0,267.017273,0.010567
2022-03-24 00:00:00-04:00,281.160187,1.0,276.585693,-0.016404
2022-04-28 00:00:00-04:00,197.581207,1.0,185.246109,-0.064464
2022-05-02 00:00:00-04:00,195.094193,1.0,195.783386,0.003526


In [15]:
# Get log returns for the entire population based on the holding period
population = daily_ohlc.iloc[::holding_period][['Close', 'buy_setup_int']]

In [16]:
# Repeat the same calculations for the population
population['exit_close'] = population['Close'].shift(-1)
population['y_log_return'] = np.log(population['Close'].shift(-1)) - np.log(population['Close'])

In [17]:
population

Unnamed: 0_level_0,Close,buy_setup_int,exit_close,y_log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-12-31 00:00:00-05:00,293.704895,,272.094727,-0.076425
2022-01-07 00:00:00-05:00,272.094727,0.0,269.048981,-0.011257
2022-01-14 00:00:00-05:00,269.048981,0.0,233.398102,-0.142148
2022-01-24 00:00:00-05:00,233.398102,0.0,244.522766,0.046563
2022-01-31 00:00:00-05:00,244.522766,1.0,246.939407,0.009835
...,...,...,...,...
2023-11-29 00:00:00-05:00,481.357666,0.0,455.029999,-0.056247
2023-12-06 00:00:00-05:00,455.029999,0.0,480.880005,0.055254
2023-12-13 00:00:00-05:00,480.880005,0.0,481.109985,0.000478
2023-12-20 00:00:00-05:00,481.109985,0.0,495.220001,0.028906


## Calculate backtest statistics

In [18]:
trades['y_log_return'].describe()

count    35.000000
mean      0.001908
std       0.032511
min      -0.070597
25%      -0.015846
50%       0.003526
75%       0.014763
max       0.076903
Name: y_log_return, dtype: float64

In [19]:
print("The sample mean log return is ", trades['y_log_return'].mean())
print("The population mean log return is ", population['y_log_return'].mean())

The sample mean log return is  0.0019077453098868752
The population mean log return is  0.005224266059430747


In [20]:
print("Backtest sample statistics (log returns):")
print("Mean: ", trades['y_log_return'].mean())
print("Standard deviation: ", trades['y_log_return'].std())
print("N: ", trades['y_log_return'].count())
print("t: ", (trades['y_log_return'].mean() - population['y_log_return'].mean()) / (trades['y_log_return'].std() / (trades['y_log_return'].count() ** 0.5)))

Backtest sample statistics (log returns):
Mean:  0.0019077453098868752
Standard deviation:  0.03251087812071903
N:  35
t:  -0.6035149614769291


In [21]:
trades.iloc[-30:]

Unnamed: 0_level_0,Close,buy_setup_int,exit_close,y_log_return
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2022-03-15 00:00:00-04:00,229.452667,1.0,247.361038,0.075152
2022-03-18 00:00:00-04:00,264.210663,1.0,267.017273,0.010567
2022-03-24 00:00:00-04:00,281.160187,1.0,276.585693,-0.016404
2022-04-28 00:00:00-04:00,197.581207,1.0,185.246109,-0.064464
2022-05-02 00:00:00-04:00,195.094193,1.0,195.783386,0.003526
2022-05-13 00:00:00-04:00,176.846268,1.0,172.43161,-0.02528
2022-05-17 00:00:00-04:00,181.550583,1.0,169.175537,-0.070597
2022-05-26 00:00:00-04:00,178.29451,1.0,186.494598,0.044966
2022-06-02 00:00:00-04:00,195.683487,1.0,186.97403,-0.045529
2022-06-24 00:00:00-04:00,171.089432,1.0,168.521988,-0.01512
