In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf

data = yf.download('AAPL', start='2015-01-01', end='2025-06-01')

data

[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2015-01-02,24.288576,24.757330,23.848702,24.746222,212818400
2015-01-05,23.604334,24.137514,23.417722,24.057537,257142000
2015-01-06,23.606558,23.866483,23.244438,23.668762,263188400
2015-01-07,23.937571,24.037541,23.704304,23.815383,160423600
2015-01-08,24.857315,24.915077,24.148629,24.266374,237458000
...,...,...,...,...,...
2025-05-23,195.270004,197.699997,193.460007,193.669998,78432900
2025-05-27,200.210007,200.740005,197.429993,198.300003,56288500
2025-05-28,200.419998,202.729996,199.899994,200.589996,45339700
2025-05-29,199.949997,203.809998,198.509995,203.580002,51396800


In [None]:
# Flatten the columns to just the second level (e.g., 'Price', 'Close', etc.)
data.columns = data.columns.get_level_values(0)
data

Price,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-02,24.288576,24.757330,23.848702,24.746222,212818400
2015-01-05,23.604334,24.137514,23.417722,24.057537,257142000
2015-01-06,23.606558,23.866483,23.244438,23.668762,263188400
2015-01-07,23.937571,24.037541,23.704304,23.815383,160423600
2015-01-08,24.857315,24.915077,24.148629,24.266374,237458000
...,...,...,...,...,...
2025-05-23,195.270004,197.699997,193.460007,193.669998,78432900
2025-05-27,200.210007,200.740005,197.429993,198.300003,56288500
2025-05-28,200.419998,202.729996,199.899994,200.589996,45339700
2025-05-29,199.949997,203.809998,198.509995,203.580002,51396800


In [15]:
data.columns.name = None
data = data.reset_index()
data


Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2015-01-02,24.288576,24.757330,23.848702,24.746222,212818400
1,2015-01-05,23.604334,24.137514,23.417722,24.057537,257142000
2,2015-01-06,23.606558,23.866483,23.244438,23.668762,263188400
3,2015-01-07,23.937571,24.037541,23.704304,23.815383,160423600
4,2015-01-08,24.857315,24.915077,24.148629,24.266374,237458000
...,...,...,...,...,...,...
2613,2025-05-23,195.270004,197.699997,193.460007,193.669998,78432900
2614,2025-05-27,200.210007,200.740005,197.429993,198.300003,56288500
2615,2025-05-28,200.419998,202.729996,199.899994,200.589996,45339700
2616,2025-05-29,199.949997,203.809998,198.509995,203.580002,51396800


In [19]:
import pandas_ta

data['garman_klass_vol'] = (((np.log(data['High']) - np.log(data['Low']))**2)/2) - ((2*np.log(2)-1)*(np.log(data['Close'])-np.log(data['Open']))**2)
data['rsi'] = pandas_ta.rsi(close=data['Close'], length=14)

bands = pandas_ta.bbands(close=np.log1p(data['Close']), length=20)
data['bb_low'] = bands['BBL_20_2.0']
data['bb_mid'] = bands['BBM_20_2.0']
data['bb_high'] = bands['BBU_20_2.0']


atr = pandas_ta.atr(high=data['High'], low=data['Low'], close=data['Close'], length=14)
data['atr'] = (atr - atr.mean()) / atr.std()

macd = pandas_ta.macd(close=data['Close'], length=20).iloc[:, 0]  # MACD line
data['macd'] = (macd - macd.mean()) / macd.std()


data['dollar_volume'] = (data['Close']*data['Volume'])/1e6

data

Unnamed: 0,Date,Close,High,Low,Open,Volume,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd,dollar_volume
0,2015-01-02,24.288576,24.757330,23.848702,24.746222,212818400,0.000564,,,,,,,5169.055909
1,2015-01-05,23.604334,24.137514,23.417722,24.057537,257142000,0.000319,0.000000,,,,,,6069.665622
2,2015-01-06,23.606558,23.866483,23.244438,23.668762,263188400,0.000346,0.024996,,,,,,6212.972189
3,2015-01-07,23.937571,24.037541,23.704304,23.815383,160423600,0.000087,3.876217,,,,,,3840.151246
4,2015-01-08,24.857315,24.915077,24.148629,24.266374,237458000,0.000265,13.811156,,,,,,5902.568320
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2613,2025-05-23,195.270004,197.699997,193.460007,193.669998,78432900,0.000209,38.828188,5.268682,5.330463,5.392244,2.188435,-0.697938,15315.592718
2614,2025-05-27,200.210007,200.740005,197.429993,198.300003,56288500,0.000103,44.869601,5.266110,5.328120,5.390129,2.154876,-0.804277,11269.520963
2615,2025-05-28,200.419998,202.729996,199.899994,200.589996,45339700,0.000099,45.117730,5.264224,5.325576,5.386927,2.021081,-0.873405,9086.982591
2616,2025-05-29,199.949997,203.809998,198.509995,203.580002,51396800,0.000222,44.633524,5.262945,5.322612,5.382279,1.992867,-0.936953,10276.790003


In [27]:
spy = yf.download('SPY', start='2015-01-01', end='2025-06-01')
spy.columns = spy.columns.get_level_values(0)
spy.columns.name = None

spy = spy.reset_index()

spy


[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2015-01-02,171.567993,172.778993,170.524037,172.361410,121465900
1,2015-01-05,168.469589,170.682773,168.160582,170.515742,169632600
2,2015-01-06,166.882751,169.304726,166.080987,168.778568,209151400
3,2015-01-07,168.962311,169.304732,167.768030,168.219014,125346700
4,2015-01-08,171.960571,172.177722,170.365415,170.382109,147217800
...,...,...,...,...,...,...
2613,2025-05-23,577.403015,580.095069,573.903351,574.282236,76029000
2614,2025-05-27,589.407593,589.567094,576.725054,584.342549,72588500
2615,2025-05-28,585.997620,591.022803,585.259811,589.816348,68445500
2616,2025-05-29,588.310791,591.451531,584.342542,591.311929,69973300


In [34]:
data['returns'] = data['Close'].pct_change()
spy['returns'] = spy['Close'].pct_change()
data['market returns'] = spy['returns']

window = 30
beta_values = [np.nan] * (window - 1)

returns = pd.DataFrame({
    'stock': data['returns'],
    'market': spy['returns']
}).dropna()

for i in range(window - 1, len(returns)):
    window_data = returns.iloc[i - window + 1 : i + 1]
    cov = np.cov(window_data['stock'], window_data['market'])[0, 1]
    var = np.var(window_data['market'])
    beta = cov / var if var != 0 else np.nan
    beta_values.append(beta)

beta_series = pd.Series(beta_values, index=returns.index)
data = data.reindex(beta_series.index)

data['beta'] = beta_series
data

Unnamed: 0,Date,Close,High,Low,Open,Volume,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd,dollar_volume,returns,beta,market returns
2,2015-01-06,23.606558,23.866483,23.244438,23.668762,263188400,0.000346,0.024996,3.181122,3.241352,3.301583,-0.875314,0.229515,6212.972189,0.000094,,-0.009419
3,2015-01-07,23.937571,24.037541,23.704304,23.815383,160423600,0.000087,3.876217,3.181122,3.241352,3.301583,-0.875314,0.229515,3840.151246,0.014022,,0.012461
4,2015-01-08,24.857315,24.915077,24.148629,24.266374,237458000,0.000265,13.811156,3.181122,3.241352,3.301583,-0.875314,0.229515,5902.568320,0.038423,,0.017745
5,2015-01-09,24.883972,25.159448,24.484086,25.030596,214798000,0.000357,14.088308,3.181122,3.241352,3.301583,-0.875314,0.229515,5345.027454,0.001072,,-0.008014
6,2015-01-12,24.270807,25.021702,24.170837,25.015038,198603200,0.000246,13.048893,3.181122,3.241352,3.301583,-0.875314,0.229515,4820.259990,-0.024641,,-0.007833
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2613,2025-05-23,195.270004,197.699997,193.460007,193.669998,78432900,0.000209,38.828188,5.268682,5.330463,5.392244,2.188435,-0.697938,15315.592718,-0.030244,1.421025,-0.006826
2614,2025-05-27,200.210007,200.740005,197.429993,198.300003,56288500,0.000103,44.869601,5.266110,5.328120,5.390129,2.154876,-0.804277,11269.520963,0.025298,1.359656,0.020791
2615,2025-05-28,200.419998,202.729996,199.899994,200.589996,45339700,0.000099,45.117730,5.264224,5.325576,5.386927,2.021081,-0.873405,9086.982591,0.001049,1.315832,-0.005785
2616,2025-05-29,199.949997,203.809998,198.509995,203.580002,51396800,0.000222,44.633524,5.262945,5.322612,5.382279,1.992867,-0.936953,10276.790003,-0.002345,1.322565,0.003947


In [35]:
data = data.fillna(method='bfill')
data

  data = data.fillna(method='bfill')


Unnamed: 0,Date,Close,High,Low,Open,Volume,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd,dollar_volume,returns,beta,market returns
2,2015-01-06,23.606558,23.866483,23.244438,23.668762,263188400,0.000346,0.024996,3.181122,3.241352,3.301583,-0.875314,0.229515,6212.972189,0.000094,0.871752,-0.009419
3,2015-01-07,23.937571,24.037541,23.704304,23.815383,160423600,0.000087,3.876217,3.181122,3.241352,3.301583,-0.875314,0.229515,3840.151246,0.014022,0.871752,0.012461
4,2015-01-08,24.857315,24.915077,24.148629,24.266374,237458000,0.000265,13.811156,3.181122,3.241352,3.301583,-0.875314,0.229515,5902.568320,0.038423,0.871752,0.017745
5,2015-01-09,24.883972,25.159448,24.484086,25.030596,214798000,0.000357,14.088308,3.181122,3.241352,3.301583,-0.875314,0.229515,5345.027454,0.001072,0.871752,-0.008014
6,2015-01-12,24.270807,25.021702,24.170837,25.015038,198603200,0.000246,13.048893,3.181122,3.241352,3.301583,-0.875314,0.229515,4820.259990,-0.024641,0.871752,-0.007833
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2613,2025-05-23,195.270004,197.699997,193.460007,193.669998,78432900,0.000209,38.828188,5.268682,5.330463,5.392244,2.188435,-0.697938,15315.592718,-0.030244,1.421025,-0.006826
2614,2025-05-27,200.210007,200.740005,197.429993,198.300003,56288500,0.000103,44.869601,5.266110,5.328120,5.390129,2.154876,-0.804277,11269.520963,0.025298,1.359656,0.020791
2615,2025-05-28,200.419998,202.729996,199.899994,200.589996,45339700,0.000099,45.117730,5.264224,5.325576,5.386927,2.021081,-0.873405,9086.982591,0.001049,1.315832,-0.005785
2616,2025-05-29,199.949997,203.809998,198.509995,203.580002,51396800,0.000222,44.633524,5.262945,5.322612,5.382279,1.992867,-0.936953,10276.790003,-0.002345,1.322565,0.003947


In [41]:
data['alpha'] = data['returns'] - data['beta'] * data['market returns']
data

Unnamed: 0,Date,Close,High,Low,Open,Volume,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd,dollar_volume,returns,beta,market returns,alpha
2,2015-01-06,23.606558,23.866483,23.244438,23.668762,263188400,0.000346,0.024996,3.181122,3.241352,3.301583,-0.875314,0.229515,6212.972189,0.000094,0.871752,-0.009419,0.008305
3,2015-01-07,23.937571,24.037541,23.704304,23.815383,160423600,0.000087,3.876217,3.181122,3.241352,3.301583,-0.875314,0.229515,3840.151246,0.014022,0.871752,0.012461,0.003159
4,2015-01-08,24.857315,24.915077,24.148629,24.266374,237458000,0.000265,13.811156,3.181122,3.241352,3.301583,-0.875314,0.229515,5902.568320,0.038423,0.871752,0.017745,0.022953
5,2015-01-09,24.883972,25.159448,24.484086,25.030596,214798000,0.000357,14.088308,3.181122,3.241352,3.301583,-0.875314,0.229515,5345.027454,0.001072,0.871752,-0.008014,0.008058
6,2015-01-12,24.270807,25.021702,24.170837,25.015038,198603200,0.000246,13.048893,3.181122,3.241352,3.301583,-0.875314,0.229515,4820.259990,-0.024641,0.871752,-0.007833,-0.017812
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2613,2025-05-23,195.270004,197.699997,193.460007,193.669998,78432900,0.000209,38.828188,5.268682,5.330463,5.392244,2.188435,-0.697938,15315.592718,-0.030244,1.421025,-0.006826,-0.020545
2614,2025-05-27,200.210007,200.740005,197.429993,198.300003,56288500,0.000103,44.869601,5.266110,5.328120,5.390129,2.154876,-0.804277,11269.520963,0.025298,1.359656,0.020791,-0.002970
2615,2025-05-28,200.419998,202.729996,199.899994,200.589996,45339700,0.000099,45.117730,5.264224,5.325576,5.386927,2.021081,-0.873405,9086.982591,0.001049,1.315832,-0.005785,0.008662
2616,2025-05-29,199.949997,203.809998,198.509995,203.580002,51396800,0.000222,44.633524,5.262945,5.322612,5.382279,1.992867,-0.936953,10276.790003,-0.002345,1.322565,0.003947,-0.007566


In [46]:
risk_free_rate = 0
rolling_mean = data['returns'].rolling(window).mean() - risk_free_rate
rolling_std = data['returns'].rolling(window).std()
data['rolling_sharpe'] = rolling_mean / rolling_std


def downside_std(returns):
    negative_returns = returns[returns < 0]
    return negative_returns.std()

data['downside_std'] = data['returns'].rolling(window).apply(downside_std, raw=False)
data['rolling_sortino'] = rolling_mean / data['downside_std']

data['rolling_volatility'] = data['returns'].rolling(window).std()

data['momentum_20'] = data['Close'] / data['Close'].shift(20) - 1

data['excess_return'] = data['returns'] - data['market returns']

data['adx'] = pandas_ta.adx(high=data['High'], low=data['Low'], close=data['Close'], length=14)['ADX_14']

data

Unnamed: 0,Date,Close,High,Low,Open,Volume,garman_klass_vol,rsi,bb_low,bb_mid,...,beta,market returns,alpha,rolling_sharpe,downside_std,rolling_sortino,rolling_volatility,momentum_20,excess_return,adx
2,2015-01-06,23.606558,23.866483,23.244438,23.668762,263188400,0.000346,0.024996,3.181122,3.241352,...,0.871752,-0.009419,0.008305,,,,,,0.009513,
3,2015-01-07,23.937571,24.037541,23.704304,23.815383,160423600,0.000087,3.876217,3.181122,3.241352,...,0.871752,0.012461,0.003159,,,,,,0.001561,
4,2015-01-08,24.857315,24.915077,24.148629,24.266374,237458000,0.000265,13.811156,3.181122,3.241352,...,0.871752,0.017745,0.022953,,,,,,0.020677,
5,2015-01-09,24.883972,25.159448,24.484086,25.030596,214798000,0.000357,14.088308,3.181122,3.241352,...,0.871752,-0.008014,0.008058,,,,,,0.009086,
6,2015-01-12,24.270807,25.021702,24.170837,25.015038,198603200,0.000246,13.048893,3.181122,3.241352,...,0.871752,-0.007833,-0.017812,,,,,,-0.016808,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2613,2025-05-23,195.270004,197.699997,193.460007,193.669998,78432900,0.000209,38.828188,5.268682,5.330463,...,1.421025,-0.006826,-0.020545,0.049925,0.013772,0.081828,0.022573,-0.065720,-0.023419,14.959273
2614,2025-05-27,200.210007,200.740005,197.429993,198.300003,56288500,0.000103,44.869601,5.266110,5.328120,...,1.359656,0.020791,-0.002970,0.028293,0.013772,0.044806,0.021811,-0.046005,0.004508,15.366639
2615,2025-05-28,200.419998,202.729996,199.899994,200.589996,45339700,0.000099,45.117730,5.264224,5.325576,...,1.315832,-0.005785,0.008662,-0.003876,0.013772,-0.006033,0.021433,-0.049842,0.006834,15.352194
2616,2025-05-29,199.949997,203.809998,198.509995,203.580002,51396800,0.000222,44.633524,5.262945,5.322612,...,1.322565,0.003947,-0.007566,-0.004605,0.013740,-0.007184,0.021434,-0.057825,-0.006292,15.539595


In [48]:
data = data.drop(columns=['High', 'Low', 'Open', 'Volume','dollar_volume'])
data = data.fillna(method='bfill')
data

  data = data.fillna(method='bfill')


Unnamed: 0,Date,Close,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd,returns,beta,market returns,alpha,rolling_sharpe,downside_std,rolling_sortino,rolling_volatility,momentum_20,excess_return,adx
2,2015-01-06,23.606558,0.000346,0.024996,3.181122,3.241352,3.301583,-0.875314,0.229515,0.000094,0.871752,-0.009419,0.008305,0.354553,0.011713,0.573686,0.018952,0.125165,0.009513,59.735629
3,2015-01-07,23.937571,0.000087,3.876217,3.181122,3.241352,3.301583,-0.875314,0.229515,0.014022,0.871752,0.012461,0.003159,0.354553,0.011713,0.573686,0.018952,0.125165,0.001561,59.735629
4,2015-01-08,24.857315,0.000265,13.811156,3.181122,3.241352,3.301583,-0.875314,0.229515,0.038423,0.871752,0.017745,0.022953,0.354553,0.011713,0.573686,0.018952,0.125165,0.020677,59.735629
5,2015-01-09,24.883972,0.000357,14.088308,3.181122,3.241352,3.301583,-0.875314,0.229515,0.001072,0.871752,-0.008014,0.008058,0.354553,0.011713,0.573686,0.018952,0.125165,0.009086,59.735629
6,2015-01-12,24.270807,0.000246,13.048893,3.181122,3.241352,3.301583,-0.875314,0.229515,-0.024641,0.871752,-0.007833,-0.017812,0.354553,0.011713,0.573686,0.018952,0.125165,-0.016808,59.735629
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2613,2025-05-23,195.270004,0.000209,38.828188,5.268682,5.330463,5.392244,2.188435,-0.697938,-0.030244,1.421025,-0.006826,-0.020545,0.049925,0.013772,0.081828,0.022573,-0.065720,-0.023419,14.959273
2614,2025-05-27,200.210007,0.000103,44.869601,5.266110,5.328120,5.390129,2.154876,-0.804277,0.025298,1.359656,0.020791,-0.002970,0.028293,0.013772,0.044806,0.021811,-0.046005,0.004508,15.366639
2615,2025-05-28,200.419998,0.000099,45.117730,5.264224,5.325576,5.386927,2.021081,-0.873405,0.001049,1.315832,-0.005785,0.008662,-0.003876,0.013772,-0.006033,0.021433,-0.049842,0.006834,15.352194
2616,2025-05-29,199.949997,0.000222,44.633524,5.262945,5.322612,5.382279,1.992867,-0.936953,-0.002345,1.322565,0.003947,-0.007566,-0.004605,0.013740,-0.007184,0.021434,-0.057825,-0.006292,15.539595


In [49]:
data = data.drop(columns=['Date'])