In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf

data = yf.download('AAPL', start='2015-01-01', end='2025-06-01')

data

[*********************100%***********************]  1 of 1 completed


Price,Close,High,Low,Open,Volume
Ticker,AAPL,AAPL,AAPL,AAPL,AAPL
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2015-01-02,24.288576,24.757330,23.848702,24.746222,212818400
2015-01-05,23.604334,24.137514,23.417722,24.057537,257142000
2015-01-06,23.606558,23.866483,23.244438,23.668762,263188400
2015-01-07,23.937571,24.037541,23.704304,23.815383,160423600
2015-01-08,24.857315,24.915077,24.148629,24.266374,237458000
...,...,...,...,...,...
2025-05-23,195.270004,197.699997,193.460007,193.669998,78432900
2025-05-27,200.210007,200.740005,197.429993,198.300003,56288500
2025-05-28,200.419998,202.729996,199.899994,200.589996,45339700
2025-05-29,199.949997,203.809998,198.509995,203.580002,51396800


In [None]:
# Flatten the columns to just the second level (e.g., 'Price', 'Close', etc.)
data.columns = data.columns.get_level_values(0)
data

Price,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2015-01-02,24.288576,24.757330,23.848702,24.746222,212818400
2015-01-05,23.604334,24.137514,23.417722,24.057537,257142000
2015-01-06,23.606558,23.866483,23.244438,23.668762,263188400
2015-01-07,23.937571,24.037541,23.704304,23.815383,160423600
2015-01-08,24.857315,24.915077,24.148629,24.266374,237458000
...,...,...,...,...,...
2025-05-23,195.270004,197.699997,193.460007,193.669998,78432900
2025-05-27,200.210007,200.740005,197.429993,198.300003,56288500
2025-05-28,200.419998,202.729996,199.899994,200.589996,45339700
2025-05-29,199.949997,203.809998,198.509995,203.580002,51396800


In [15]:
data.columns.name = None
data = data.reset_index()
data


Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2015-01-02,24.288576,24.757330,23.848702,24.746222,212818400
1,2015-01-05,23.604334,24.137514,23.417722,24.057537,257142000
2,2015-01-06,23.606558,23.866483,23.244438,23.668762,263188400
3,2015-01-07,23.937571,24.037541,23.704304,23.815383,160423600
4,2015-01-08,24.857315,24.915077,24.148629,24.266374,237458000
...,...,...,...,...,...,...
2613,2025-05-23,195.270004,197.699997,193.460007,193.669998,78432900
2614,2025-05-27,200.210007,200.740005,197.429993,198.300003,56288500
2615,2025-05-28,200.419998,202.729996,199.899994,200.589996,45339700
2616,2025-05-29,199.949997,203.809998,198.509995,203.580002,51396800


In [19]:
import pandas_ta

data['garman_klass_vol'] = (((np.log(data['High']) - np.log(data['Low']))**2)/2) - ((2*np.log(2)-1)*(np.log(data['Close'])-np.log(data['Open']))**2)
data['rsi'] = pandas_ta.rsi(close=data['Close'], length=14)

bands = pandas_ta.bbands(close=np.log1p(data['Close']), length=20)
data['bb_low'] = bands['BBL_20_2.0']
data['bb_mid'] = bands['BBM_20_2.0']
data['bb_high'] = bands['BBU_20_2.0']


atr = pandas_ta.atr(high=data['High'], low=data['Low'], close=data['Close'], length=14)
data['atr'] = (atr - atr.mean()) / atr.std()

macd = pandas_ta.macd(close=data['Close'], length=20).iloc[:, 0]  # MACD line
data['macd'] = (macd - macd.mean()) / macd.std()


data['dollar_volume'] = (data['Close']*data['Volume'])/1e6

data

Unnamed: 0,Date,Close,High,Low,Open,Volume,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd,dollar_volume
0,2015-01-02,24.288576,24.757330,23.848702,24.746222,212818400,0.000564,,,,,,,5169.055909
1,2015-01-05,23.604334,24.137514,23.417722,24.057537,257142000,0.000319,0.000000,,,,,,6069.665622
2,2015-01-06,23.606558,23.866483,23.244438,23.668762,263188400,0.000346,0.024996,,,,,,6212.972189
3,2015-01-07,23.937571,24.037541,23.704304,23.815383,160423600,0.000087,3.876217,,,,,,3840.151246
4,2015-01-08,24.857315,24.915077,24.148629,24.266374,237458000,0.000265,13.811156,,,,,,5902.568320
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2613,2025-05-23,195.270004,197.699997,193.460007,193.669998,78432900,0.000209,38.828188,5.268682,5.330463,5.392244,2.188435,-0.697938,15315.592718
2614,2025-05-27,200.210007,200.740005,197.429993,198.300003,56288500,0.000103,44.869601,5.266110,5.328120,5.390129,2.154876,-0.804277,11269.520963
2615,2025-05-28,200.419998,202.729996,199.899994,200.589996,45339700,0.000099,45.117730,5.264224,5.325576,5.386927,2.021081,-0.873405,9086.982591
2616,2025-05-29,199.949997,203.809998,198.509995,203.580002,51396800,0.000222,44.633524,5.262945,5.322612,5.382279,1.992867,-0.936953,10276.790003


In [27]:
spy = yf.download('SPY', start='2015-01-01', end='2025-06-01')
spy.columns = spy.columns.get_level_values(0)
spy.columns.name = None

spy = spy.reset_index()

spy


[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Date,Close,High,Low,Open,Volume
0,2015-01-02,171.567993,172.778993,170.524037,172.361410,121465900
1,2015-01-05,168.469589,170.682773,168.160582,170.515742,169632600
2,2015-01-06,166.882751,169.304726,166.080987,168.778568,209151400
3,2015-01-07,168.962311,169.304732,167.768030,168.219014,125346700
4,2015-01-08,171.960571,172.177722,170.365415,170.382109,147217800
...,...,...,...,...,...,...
2613,2025-05-23,577.403015,580.095069,573.903351,574.282236,76029000
2614,2025-05-27,589.407593,589.567094,576.725054,584.342549,72588500
2615,2025-05-28,585.997620,591.022803,585.259811,589.816348,68445500
2616,2025-05-29,588.310791,591.451531,584.342542,591.311929,69973300


In [30]:
data['returns'] = data['Close'].pct_change()
spy['returns'] = spy['Close'].pct_change()

# 2. Align both dataframes
returns = pd.DataFrame({
    'stock': data['returns'],
    'market': spy['returns']
}).dropna()

cov = np.cov(returns['stock'], returns['market'])[0, 1]
var = np.var(returns['market'])

beta = cov / var
data['beta'] = beta
data

Unnamed: 0,Date,Close,High,Low,Open,Volume,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd,dollar_volume,returns,beta
0,2015-01-02,24.288576,24.757330,23.848702,24.746222,212818400,0.000564,,,,,,,5169.055909,,1.217676
1,2015-01-05,23.604334,24.137514,23.417722,24.057537,257142000,0.000319,0.000000,,,,,,6069.665622,-0.028171,1.217676
2,2015-01-06,23.606558,23.866483,23.244438,23.668762,263188400,0.000346,0.024996,,,,,,6212.972189,0.000094,1.217676
3,2015-01-07,23.937571,24.037541,23.704304,23.815383,160423600,0.000087,3.876217,,,,,,3840.151246,0.014022,1.217676
4,2015-01-08,24.857315,24.915077,24.148629,24.266374,237458000,0.000265,13.811156,,,,,,5902.568320,0.038423,1.217676
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2613,2025-05-23,195.270004,197.699997,193.460007,193.669998,78432900,0.000209,38.828188,5.268682,5.330463,5.392244,2.188435,-0.697938,15315.592718,-0.030244,1.217676
2614,2025-05-27,200.210007,200.740005,197.429993,198.300003,56288500,0.000103,44.869601,5.266110,5.328120,5.390129,2.154876,-0.804277,11269.520963,0.025298,1.217676
2615,2025-05-28,200.419998,202.729996,199.899994,200.589996,45339700,0.000099,45.117730,5.264224,5.325576,5.386927,2.021081,-0.873405,9086.982591,0.001049,1.217676
2616,2025-05-29,199.949997,203.809998,198.509995,203.580002,51396800,0.000222,44.633524,5.262945,5.322612,5.382279,1.992867,-0.936953,10276.790003,-0.002345,1.217676
