## Working with S&P500 data

In [2]:
from statsmodels.regression.rolling import RollingOLS
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime as dt
import yfinance as yf
import pandas_ta
import warnings
warnings.filterwarnings("ignore")

sp500 = pd.read_html("https://en.wikipedia.org/wiki/List_of_S%26P_500_companies")[0]

sp500['Symbol'] = sp500['Symbol'].str.replace('.', '-')

symbols_list = sp500['Symbol'].unique().tolist()

end_date = '2024-02-29'

start_date = pd.to_datetime(end_date) - pd.DateOffset(years=8)

df = yf.download(tickers=symbols_list, start=start_date, end=end_date)

df = df.stack()

df.index.names = ['date', 'ticker']

df.columns = df.columns.str.lower()

df

[*********************100%%**********************]  503 of 503 completed


Unnamed: 0_level_0,Price,adj close,close,high,low,open,volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2016-02-29,A,35.004200,37.349998,37.700001,37.330002,37.590000,2187500.0
2016-02-29,AAL,39.288364,41.000000,41.340000,40.500000,40.770000,5791300.0
2016-02-29,AAPL,22.097691,24.172501,24.557501,24.162500,24.215000,140865200.0
2016-02-29,ABBV,38.690914,54.610001,56.000000,54.470001,55.869999,8395100.0
2016-02-29,ABT,33.398457,38.740002,39.840000,38.740002,39.340000,8463400.0
...,...,...,...,...,...,...,...
2024-02-28,XYL,125.629997,125.629997,126.309998,125.260002,125.260002,877000.0
2024-02-28,YUM,138.600006,138.600006,138.770004,136.460007,136.850006,1417500.0
2024-02-28,ZBH,123.897018,124.129997,126.000000,123.709999,125.940002,1415700.0
2024-02-28,ZBRA,272.799988,272.799988,274.220001,270.829987,272.290009,212900.0


# Indicators used
- Garman-Klass volatility
- Relative Strength Index
- Bollinger Bands
- Average True Range
- Moving Average Convergence Divergence
- Dollar Volume

In [4]:
df['garman_klass_vol'] = ((np.log(df['high']) - np.log(df['low']))**2)/2 - (2*np.log(2) - 1)*(np.log(df['adj close']) - np.log(df['open']))**2

df['rsi'] = df.groupby(level=1)['adj close'].transform(lambda x: pandas_ta.rsi(close=x, length=20))

df['bb_low'] = df.groupby(level=1)['adj close'].transform(lambda x: pandas_ta.bbands(close=np.log1p(x), length=20).iloc[:, 0])

df['bb_mid'] = df.groupby(level=1)['adj close'].transform(lambda x: pandas_ta.bbands(close=np.log1p(x), length=20).iloc[:, 1])

df['bb_high'] = df.groupby(level=1)['adj close'].transform(lambda x: pandas_ta.bbands(close=np.log1p(x), length=20).iloc[:, 2])

def compute_atr(stock_data):
    atr = pandas_ta.atr(high=stock_data['high'], low=stock_data['low'], close=stock_data['adj close'], length=14)

    return atr.sub(atr.mean()).div(atr.std())

df['atr'] = df.groupby(level=1, group_keys=False).apply(compute_atr)

def compute_macd(close):
    macd = pandas_ta.macd(close=close, length=20).iloc[:,0]

    return macd.sub(macd.mean()).div(macd.std())

df['macd'] = df.groupby(level=1, group_keys=False)['adj close'].apply(compute_macd)

df['dollar_volume'] = (df['adj close']*df['volume'])/1e6

df

Unnamed: 0_level_0,Price,adj close,close,high,low,open,volume,garman_klass_vol,rsi,bb_low,bb_mid,bb_high,atr,macd,dollar_volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2016-02-29,A,35.004200,37.349998,37.700001,37.330002,37.590000,2187500.0,-0.001914,,,,,,,76.571687
2016-02-29,AAL,39.288364,41.000000,41.340000,40.500000,40.770000,5791300.0,-0.000319,,,,,,,227.530705
2016-02-29,AAPL,22.097691,24.172501,24.557501,24.162500,24.215000,140865200.0,-0.003103,,,,,,,3112.795603
2016-02-29,ABBV,38.690914,54.610001,56.000000,54.470001,55.869999,8395100.0,-0.051766,,,,,,,324.814093
2016-02-29,ABT,33.398457,38.740002,39.840000,38.740002,39.340000,8463400.0,-0.009964,,,,,,,282.664497
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-02-28,XYL,125.629997,125.629997,126.309998,125.260002,125.260002,877000.0,0.000031,68.939503,4.739612,4.809591,4.879570,-2.277383,1.824159,110.177508
2024-02-28,YUM,138.600006,138.600006,138.770004,136.460007,136.850006,1417500.0,0.000079,66.990553,4.833574,4.893355,4.953135,-2.347563,1.543415,196.465509
2024-02-28,ZBH,123.897018,124.129997,126.000000,123.709999,125.940002,1415700.0,0.000065,50.883871,4.807892,4.839796,4.871700,-1.690567,0.399174,175.401009
2024-02-28,ZBRA,272.799988,272.799988,274.220001,270.829987,272.290009,212900.0,0.000076,56.634120,5.447085,5.563840,5.680595,0.354187,0.809567,58.079117
