In [4]:
from statsmodels.regression.rolling import RollingOLS
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime as dt
import yfinance as yf
import pandas_ta as ta
import warnings

# Suppress warnings
warnings.filterwarnings('ignore')

#pip install statsmodels pandas-datareader matplotlib pandas numpy datetime yfinance pandas_ta

In [2]:
# Set up dataframe with S&P500 stocks
SP500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]

SP500['Symbol'] = SP500['Symbol'].str.replace('.', '-')
symbols_list = SP500['Symbol'].unique().tolist()

end_date = '2024-12-01'
start_date = pd.to_datetime(end_date) - pd.DateOffset(365 * 8)  # Eight years of time

df = yf.download(tickers=symbols_list, 
                 start=start_date,
                 end=end_date).stack()
# Stack makes sure each row is unique, pivots columns into a multi-level index

df.index.names = ['date', 'ticker']
df.columns = df.columns.str.lower()  # Convert all column names to lower case

# Uncomment the line below if you want to see the dataframe
# print(df)


[*********************100%***********************]  503 of 503 completed


In [7]:
# Garman-Klass volatility, estimates volatility using FOUR key metrics
# High: Highest price of asset during period
# Low: Lowest price of asset during period
# Close: Closing price of asset
# Open: Opening price of asset
df['garman_klass_vol'] = ((np.log(df['high']) - np.log(df['low'])) ** 2) / 2 - ((2 * np.log(2) - 1) * (np.log(df['adj close']) - np.log(df['open'])) ** 2)
print(df)

#Relative Strength Index (RSI), used to measure speed and change of price movements of a stock from 0 to 100

df['rsi'] = df.groupby(level=1)['adj close'].transform(lambda x: ta.rsi(close=x, length=20))


Price               adj close       close        high         low        open  \
date       ticker                                                               
2016-12-05 A        41.855881   44.529999   44.689999   44.209999   44.209999   
           AAPL     25.262001   27.277500   27.507500   27.062500   27.500000   
           ABBV     43.165928   60.860001   61.220001   59.570000   59.599998   
           ABT      33.251614   38.430000   38.450001   37.990002   38.110001   
           ACGL     26.609310   27.983334   27.986668   27.553333   27.683332   
...                       ...         ...         ...         ...         ...   
2024-11-29 XYL     126.750000  126.750000  127.830002  126.070000  126.389999   
           YUM     138.270004  138.940002  139.350006  138.289993  139.039993   
           ZBH     112.099998  112.099998  112.419998  111.410004  111.839996   
           ZBRA    407.000000  407.000000  409.029999  405.510010  406.640015   
           ZTS     175.25000