# Stock Market Analysis  

In [1]:
!pip install pandas numpy matplotlib statsmodels pandas-datareader datetime yfinance scikit-learn PyPortfolioOpt

Defaulting to user installation because normal site-packages is not writeable
Collecting statsmodels
  Downloading statsmodels-0.14.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hCollecting pandas-datareader
  Downloading pandas_datareader-0.10.0-py3-none-any.whl (109 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.5/109.5 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datetime
  Downloading DateTime-5.5-py3-none-any.whl (52 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting yfinance
  Downloading yfinance-0.2.54-py2.py3-none-any.whl (108 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.7/108.7 kB[0m 

In [2]:
!pip install pandas-ta


Defaulting to user installation because normal site-packages is not writeable
Collecting pandas-ta
  Downloading pandas_ta-0.3.14b.tar.gz (115 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: pandas-ta
[33m  DEPRECATION: pandas-ta is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559[0m[33m
[0m  Running setup.py install for pandas-ta ... [?25ldone
[?25hSuccessfully installed pandas-ta-0.3.14b0


## Data Collection & Fetching S&P 500 Data  


In [1]:
from statsmodels.regression.rolling import RollingOLS
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime as dt
import yfinance as yf
import pandas_ta
import warnings

warnings.filterwarnings('ignore')

snp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
snp500['Symbol'] = snp500['Symbol'].str.replace('.', '-')
symbols = snp500['Symbol'].unique().tolist()
endDate = '2025-03-19'
startDate = pd.to_datetime(endDate) - pd.DateOffset(365*8)
df = yf.download(tickers = symbols, start=startDate, end=endDate, auto_adjust=False).stack()
df.index.names = ['date', 'ticker']
df.columns = df.columns.str.lower()
df

[*********************100%***********************]  503 of 503 completed


Unnamed: 0_level_0,Price,adj close,close,high,low,open,volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-03-21,A,49.549419,52.660000,53.730000,52.480000,53.279999,1492700.0
2017-03-21,AAPL,32.481522,34.959999,35.700001,34.932499,35.527500,158119600.0
2017-03-21,ABBV,46.380817,65.349998,66.000000,65.040001,65.959999,5187400.0
2017-03-21,ABT,38.809986,44.799999,45.380001,44.599998,45.150002,5567100.0
2017-03-21,ACGL,29.671204,31.203333,31.736668,31.173332,31.726667,980100.0
...,...,...,...,...,...,...,...
2025-03-18,XYL,120.919998,120.919998,122.949997,120.639999,122.669998,1346600.0
2025-03-18,YUM,157.300003,157.300003,157.940002,156.160004,156.710007,1332200.0
2025-03-18,ZBH,112.879997,112.879997,113.940002,111.349998,111.660004,1619300.0
2025-03-18,ZBRA,282.579987,282.579987,285.540009,281.920013,282.720001,323500.0


## Calculating Technical Indicators for Each Stock
- Garman Klass Volatility
- RSI (Relative Strength Index)  
- Bollinger Bands  
- ATR (Average True Range)
- MACD

In [20]:
df['garman-klass-vol'] = ((np.log(df['high']) - np.log(df['low']))**2)/2 - ((2*np.log(2)-1) * ((np.log(df['adj close']) - np.log(df['open'])))**2)
df['rsi'] = df.groupby(level = 1)['adj close'].transform(lambda x: pandas_ta.rsi(close = x, length = 20))
df['bb_low'] = df.groupby(level = 1)['adj close'].transform(lambda x: pandas_ta.bbands(close = np.log1p(x), length = 20).iloc[:,0])
df['bb_mid'] = df.groupby(level = 1)['adj close'].transform(lambda x: pandas_ta.bbands(close = np.log1p(x), length = 20).iloc[:,1])
df['bb_high'] = df.groupby(level = 1)['adj close'].transform(lambda x: pandas_ta.bbands(close = np.log1p(x), length = 20).iloc[:,2])
def calculate_atr(stock_data):
    atr = pandas_ta.atr(high = stock_data['high'], 
                       low = stock_data['low'],
                       close = stock_data['close'],
                       length = 14)
    return atr.sub(atr.mean()).div(atr.std())
df['atr'] = df.groupby(level = 1, group_keys = False).apply(calculate_atr)
def calculate_macd(stock_data):
    macd = pandas_ta.macd(close = stock_data['adj close'], length = 20).iloc[:, 0]
    return macd.sub(macd.mean()).div(macd.std())
df['macd'] = df.groupby(level = 1, group_keys = False).apply(calculate_macd)
df['dollar-volume'] = (df['adj close'] * df['volume']) / 1e6
df

Unnamed: 0_level_0,Price,adj close,close,high,low,open,volume,garman-class-vol,rsi,bb_low,bb_mid,bb_high,atr,macd,dollar-volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2017-03-21,A,49.549419,52.660000,53.730000,52.480000,53.279999,1492700.0,-0.001758,,,,,,,73.962418
2017-03-21,AAPL,32.481522,34.959999,35.700001,34.932499,35.527500,158119600.0,-0.002868,,,,,,,5135.965204
2017-03-21,ABBV,46.380817,65.349998,66.000000,65.040001,65.959999,5187400.0,-0.047800,,,,,,,240.595852
2017-03-21,ABT,38.809986,44.799999,45.380001,44.599998,45.150002,5567100.0,-0.008694,,,,,,,216.059074
2017-03-21,ACGL,29.671204,31.203333,31.736668,31.173332,31.726667,980100.0,-0.001573,,,,,,,29.080747
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-18,XYL,120.919998,120.919998,122.949997,120.639999,122.669998,1346600.0,0.000100,41.466266,4.800888,4.851952,4.903017,0.870868,-0.797464,162.830870
2025-03-18,YUM,157.300003,157.300003,157.940002,156.160004,156.710007,1332200.0,0.000059,63.526886,5.000937,5.049923,5.098909,1.602085,2.404496,209.555064
2025-03-18,ZBH,112.879997,112.879997,113.940002,111.349998,111.660004,1619300.0,0.000219,60.548229,4.625923,4.677197,4.728471,0.142403,0.634891,182.786580
2025-03-18,ZBRA,282.579987,282.579987,285.540009,281.920013,282.720001,323500.0,0.000081,31.882698,5.595533,5.702298,5.809063,0.332612,-2.574897,91.414626
