# Stock Market Analysis  

In [1]:
!pip install pandas numpy matplotlib statsmodels pandas-datareader datetime yfinance scikit-learn PyPortfolioOpt

Defaulting to user installation because normal site-packages is not writeable
Collecting statsmodels
  Downloading statsmodels-0.14.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m13.7 MB/s[0m eta [36m0:00:00[0mm eta [36m0:00:01[0m[36m0:00:01[0m
[?25hCollecting pandas-datareader
  Downloading pandas_datareader-0.10.0-py3-none-any.whl (109 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m109.5/109.5 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting datetime
  Downloading DateTime-5.5-py3-none-any.whl (52 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 kB[0m [31m19.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting yfinance
  Downloading yfinance-0.2.54-py2.py3-none-any.whl (108 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m108.7/108.7 kB[0m 

In [2]:
!pip install pandas-ta


Defaulting to user installation because normal site-packages is not writeable
Collecting pandas-ta
  Downloading pandas_ta-0.3.14b.tar.gz (115 kB)
[2K     [38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m115.1/115.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m MB/s[0m eta [36m0:00:01[0m
[?25h  Preparing metadata (setup.py) ... [?25ldone
Installing collected packages: pandas-ta
[33m  DEPRECATION: pandas-ta is being installed using the legacy 'setup.py install' method, because it does not have a 'pyproject.toml' and the 'wheel' package is not installed. pip 23.1 will enforce this behaviour change. A possible replacement is to enable the '--use-pep517' option. Discussion can be found at https://github.com/pypa/pip/issues/8559[0m[33m
[0m  Running setup.py install for pandas-ta ... [?25ldone
[?25hSuccessfully installed pandas-ta-0.3.14b0


## Data Collection & Fetching S&P 500 Data  


In [1]:
from statsmodels.regression.rolling import RollingOLS
import pandas_datareader.data as web
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pandas as pd
import numpy as np
import datetime as dt
import yfinance as yf
import pandas_ta
import warnings

warnings.filterwarnings('ignore')

snp500 = pd.read_html('https://en.wikipedia.org/wiki/List_of_S%26P_500_companies')[0]
snp500['Symbol'] = snp500['Symbol'].str.replace('.', '-')
symbols = snp500['Symbol'].unique().tolist()
endDate = '2025-03-19'
startDate = pd.to_datetime(endDate) - pd.DateOffset(365*8)
df = yf.download(tickers = symbols, start=startDate, end=endDate, auto_adjust=False).stack()
df.index.names = ['date', 'ticker']
df.columns = df.columns.str.lower()
df

[*********************100%***********************]  503 of 503 completed


Unnamed: 0_level_0,Price,adj close,close,high,low,open,volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-03-21,A,49.549419,52.660000,53.730000,52.480000,53.279999,1492700.0
2017-03-21,AAPL,32.481510,34.959999,35.700001,34.932499,35.527500,158119600.0
2017-03-21,ABBV,46.380829,65.349998,66.000000,65.040001,65.959999,5187400.0
2017-03-21,ABT,38.809990,44.799999,45.380001,44.599998,45.150002,5567100.0
2017-03-21,ACGL,29.671204,31.203333,31.736668,31.173332,31.726667,980100.0
...,...,...,...,...,...,...,...
2025-03-18,XYL,120.919998,120.919998,122.949997,120.639999,122.669998,1346600.0
2025-03-18,YUM,157.300003,157.300003,157.940002,156.160004,156.710007,1332200.0
2025-03-18,ZBH,112.879997,112.879997,113.940002,111.349998,111.660004,1619300.0
2025-03-18,ZBRA,282.579987,282.579987,285.540009,281.920013,282.720001,323500.0


## Calculating Technical Indicators for Each Stock
- Garman Klass Volatility
- RSI (Relative Strength Index)  
- Bollinger Bands  
- ATR (Average True Range)
- MACD

In [2]:
df['garman-klass-vol'] = ((np.log(df['high']) - np.log(df['low']))**2)/2 - ((2*np.log(2)-1) * ((np.log(df['adj close']) - np.log(df['open'])))**2)
df['rsi'] = df.groupby(level = 1)['adj close'].transform(lambda x: pandas_ta.rsi(close = x, length = 20))
df['bb_low'] = df.groupby(level = 1)['adj close'].transform(lambda x: pandas_ta.bbands(close = np.log1p(x), length = 20).iloc[:,0])
df['bb_mid'] = df.groupby(level = 1)['adj close'].transform(lambda x: pandas_ta.bbands(close = np.log1p(x), length = 20).iloc[:,1])
df['bb_high'] = df.groupby(level = 1)['adj close'].transform(lambda x: pandas_ta.bbands(close = np.log1p(x), length = 20).iloc[:,2])
def calculate_atr(stock_data):
    atr = pandas_ta.atr(high = stock_data['high'], 
                       low = stock_data['low'],
                       close = stock_data['close'],
                       length = 14)
    return atr.sub(atr.mean()).div(atr.std())
df['atr'] = df.groupby(level = 1, group_keys = False).apply(calculate_atr)
def calculate_macd(stock_data):
    macd = pandas_ta.macd(close = stock_data['adj close'], length = 20).iloc[:, 0]
    return macd.sub(macd.mean()).div(macd.std())
df['macd'] = df.groupby(level = 1, group_keys = False).apply(calculate_macd)
df['dollar-volume'] = (df['adj close'] * df['volume']) / 1e6
df

Unnamed: 0_level_0,Price,adj close,close,high,low,open,volume,garman-klass-vol,rsi,bb_low,bb_mid,bb_high,atr,macd,dollar-volume
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2017-03-21,A,49.549419,52.660000,53.730000,52.480000,53.279999,1492700.0,-0.001758,,,,,,,73.962418
2017-03-21,AAPL,32.481510,34.959999,35.700001,34.932499,35.527500,158119600.0,-0.002868,,,,,,,5135.963394
2017-03-21,ABBV,46.380829,65.349998,66.000000,65.040001,65.959999,5187400.0,-0.047800,,,,,,,240.595912
2017-03-21,ABT,38.809990,44.799999,45.380001,44.599998,45.150002,5567100.0,-0.008694,,,,,,,216.059095
2017-03-21,ACGL,29.671204,31.203333,31.736668,31.173332,31.726667,980100.0,-0.001573,,,,,,,29.080747
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-18,XYL,120.919998,120.919998,122.949997,120.639999,122.669998,1346600.0,0.000100,41.466266,4.800888,4.851952,4.903017,0.870868,-0.797464,162.830870
2025-03-18,YUM,157.300003,157.300003,157.940002,156.160004,156.710007,1332200.0,0.000059,63.526885,5.000937,5.049923,5.098909,1.602085,2.404496,209.555064
2025-03-18,ZBH,112.879997,112.879997,113.940002,111.349998,111.660004,1619300.0,0.000219,60.548229,4.625923,4.677197,4.728471,0.142403,0.634891,182.786580
2025-03-18,ZBRA,282.579987,282.579987,285.540009,281.920013,282.720001,323500.0,0.000081,31.882698,5.595533,5.702298,5.809063,0.332612,-2.574897,91.414626


## Aggregating Technical Indicators for efficiency & Finding the 150 Most Liquid Stocks for each month.

In [3]:
last_cols = [c for c in df.columns.unique(0) if c not in ['dollar-volume', 'volume', 'high', 'low', 'close', 'open']]


data = pd.concat([df.unstack('ticker')['dollar-volume'].resample('M').mean().stack('ticker').to_frame('dollar-volume'),
    df.unstack()[last_cols].resample('M').last().stack('ticker')], axis = 1).dropna()
data

Unnamed: 0_level_0,Unnamed: 1_level_0,dollar-volume,adj close,garman-klass-vol,rsi,bb_low,bb_mid,bb_high,atr,macd
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2017-04-30,A,86.102472,51.926792,-0.001496,67.751886,3.905932,3.939235,3.972537,-1.668130,0.173096
2017-04-30,AAPL,2608.757504,33.366482,-0.002240,58.120002,3.516103,3.532141,3.548179,-1.511922,-0.269622
2017-04-30,ABBV,253.361370,47.265160,-0.043208,63.932919,3.835177,3.854937,3.874696,-1.715298,-0.225934
2017-04-30,ABT,270.964079,38.036320,-0.007353,45.597162,3.648956,3.664271,3.679586,-1.682351,-0.349518
2017-04-30,ACGL,27.139338,30.736210,-0.001408,62.635100,3.414365,3.438866,3.463367,-1.203959,0.095472
...,...,...,...,...,...,...,...,...,...,...
2025-03-31,XYL,188.918267,120.919998,0.000100,41.466266,4.800888,4.851952,4.903017,0.870868,-0.797464
2025-03-31,YUM,340.510976,157.300003,0.000059,63.526885,5.000937,5.049923,5.098909,1.602085,2.404496
2025-03-31,ZBH,220.313828,112.879997,0.000219,60.548229,4.625923,4.677197,4.728471,0.142403,0.634891
2025-03-31,ZBRA,178.565405,282.579987,0.000081,31.882698,5.595533,5.702298,5.809063,0.332612,-2.574897


In [44]:
data['dollar-volume'] = (data.loc[:, 'dollar-volume'].unstack('ticker').rolling(5 * 12, min_periods = 12).mean().stack())
data['dollar-volume-rank'] = (data.groupby('date')['dollar-volume'].rank(ascending = False))
data = data[data['dollar-volume-rank'] < 150].drop(['dollar-volume', 'dollar-volume-rank'], axis = 1)
data

Unnamed: 0_level_0,Unnamed: 1_level_0,adj close,garman-klass-vol,rsi,bb_low,bb_mid,bb_high,atr,macd
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2018-03-31,AAPL,39.587822,-0.000890,44.825053,3.687069,3.741262,3.795456,-1.056620,-0.437096
2018-03-31,ABBV,69.426056,-0.036543,33.321298,4.214538,4.394779,4.575019,2.000481,-2.190387
2018-03-31,ABT,53.024132,-0.005071,47.670015,3.970474,4.015669,4.060864,-0.838610,-0.274448
2018-03-31,ACN,138.353165,-0.001148,46.272111,4.894494,4.961246,5.027997,-0.448978,-0.681992
2018-03-31,ADBE,216.080002,0.000878,53.315881,5.341287,5.392512,5.443737,-0.637509,0.237975
...,...,...,...,...,...,...,...,...,...
2025-03-31,VZ,44.230000,0.000162,58.201272,3.749895,3.796761,3.843628,1.213496,1.601997
2025-03-31,WDAY,251.910004,0.000101,48.047776,5.473597,5.540050,5.606503,1.256544,-1.009656
2025-03-31,WFC,71.110001,0.000117,43.220412,4.203922,4.311719,4.419516,3.184888,-2.168706
2025-03-31,WMT,85.355598,0.000092,36.558298,4.420787,4.537403,4.654018,4.996418,-5.248916


## Calculating Monthly Returns for the Stocks at different Time Frames

In [4]:
def calculate_monthly_returns(df):
    outlier_cutoff = 0.05  # Corrected spelling

    lags = [1, 2, 3, 6, 9, 12]

    for lag in lags:
        df[f'return_{lag}m'] = (
            df['adj close']
            .pct_change(lag)
            .pipe(lambda x: x.clip(lower=x.quantile(outlier_cutoff), 
                                   upper=x.quantile(1 - outlier_cutoff)))
            .add(1)
            .pow(1 / lag)
            .sub(1)
        )
    
    return df  # Ensure the function returns the modified DataFrame

data = data.groupby(level=1, group_keys=False).apply(calculate_monthly_returns).dropna()
data

Unnamed: 0_level_0,Unnamed: 1_level_0,dollar-volume,adj close,garman-klass-vol,rsi,bb_low,bb_mid,bb_high,atr,macd,return_1m,return_2m,return_3m,return_6m,return_9m,return_12m
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2018-04-30,A,164.751899,62.553654,-0.001340,44.520041,4.125545,4.165293,4.205042,-0.988901,-0.323773,-0.015146,-0.019904,-0.035487,-0.004954,0.011327,0.015637
2018-04-30,AAPL,5060.827080,38.993225,-0.000036,44.093180,3.659055,3.715842,3.772629,-1.111677,-0.523212,-0.015020,-0.036775,-0.002988,-0.002490,0.013081,0.013071
2018-04-30,ABBV,457.410090,71.553276,-0.040410,47.693770,4.193913,4.244860,4.295808,0.632454,-0.810684,0.030640,-0.066680,-0.045625,0.014256,0.039338,0.034580
2018-04-30,ABT,339.340564,51.687737,-0.007719,45.310069,3.954631,3.976980,3.999329,-0.826533,-0.429200,-0.025203,-0.016043,-0.020529,0.013257,0.020388,0.025886
2018-04-30,ACGL,45.069252,25.398499,-0.001527,39.144052,3.241317,3.293302,3.345287,-1.001287,-0.634783,-0.063793,-0.046077,-0.039585,-0.035667,-0.021296,-0.015770
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-31,XYL,188.918267,120.919998,0.000100,41.466266,4.800888,4.851952,4.903017,0.870868,-0.797464,-0.076171,-0.011127,0.014931,-0.017262,-0.011737,-0.004605
2025-03-31,YUM,340.510976,157.300003,0.000059,63.526885,5.000937,5.049923,5.098909,1.602085,2.404496,0.005947,0.079612,0.054191,0.021585,0.020927,0.012210
2025-03-31,ZBH,220.313828,112.879997,0.000219,60.548229,4.625923,4.677197,4.728471,0.142403,0.634891,0.082055,0.015409,0.022374,0.007848,0.004875,-0.012392
2025-03-31,ZBRA,178.565405,282.579987,0.000081,31.882698,5.595533,5.702298,5.809063,0.332612,-2.574897,-0.103063,-0.123582,-0.098909,-0.044067,-0.009857,-0.005370


## Calculating FamaFrench Factor

In [11]:
factorData = web.DataReader('F-F_Research_Data_5_Factors_2x3', 
              'famafrench',
              start='2010')[0].drop('RF', axis = 1)
factorData.index = pd.to_datetime(factorData.index.to_timestamp())
factorData = factorData.resample('M').last().div(100)
factorData.index.name = 'date'
factorData = factorData.join(data['return_1m']).sort_index()
factorData

Unnamed: 0_level_0,Unnamed: 1_level_0,Mkt-RF,SMB,HML,RMW,CMA,return_1m
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-04-30,A,0.0029,0.0093,0.0054,-0.0244,0.0128,-0.015146
2018-04-30,AAPL,0.0029,0.0093,0.0054,-0.0244,0.0128,-0.015020
2018-04-30,ABBV,0.0029,0.0093,0.0054,-0.0244,0.0128,0.030640
2018-04-30,ABT,0.0029,0.0093,0.0054,-0.0244,0.0128,-0.025203
2018-04-30,ACGL,0.0029,0.0093,0.0054,-0.0244,0.0128,-0.063793
...,...,...,...,...,...,...,...
2024-12-31,XYL,-0.0317,-0.0387,-0.0295,0.0182,-0.0110,-0.084655
2024-12-31,YUM,-0.0317,-0.0387,-0.0295,0.0182,-0.0110,-0.029724
2024-12-31,ZBH,-0.0317,-0.0387,-0.0295,0.0182,-0.0110,-0.055595
2024-12-31,ZBRA,-0.0317,-0.0387,-0.0295,0.0182,-0.0110,-0.051057


In [12]:
sizes = factorData.groupby(level = 1).size()
valStocks = sizes[sizes > 10]
valStocks
factorData = factorData[factorData.index.get_level_values('ticker').isin(valStocks.index)]
factorData

Unnamed: 0_level_0,Unnamed: 1_level_0,Mkt-RF,SMB,HML,RMW,CMA,return_1m
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2018-04-30,A,0.0029,0.0093,0.0054,-0.0244,0.0128,-0.015146
2018-04-30,AAPL,0.0029,0.0093,0.0054,-0.0244,0.0128,-0.015020
2018-04-30,ABBV,0.0029,0.0093,0.0054,-0.0244,0.0128,0.030640
2018-04-30,ABT,0.0029,0.0093,0.0054,-0.0244,0.0128,-0.025203
2018-04-30,ACGL,0.0029,0.0093,0.0054,-0.0244,0.0128,-0.063793
...,...,...,...,...,...,...,...
2024-12-31,XYL,-0.0317,-0.0387,-0.0295,0.0182,-0.0110,-0.084655
2024-12-31,YUM,-0.0317,-0.0387,-0.0295,0.0182,-0.0110,-0.029724
2024-12-31,ZBH,-0.0317,-0.0387,-0.0295,0.0182,-0.0110,-0.055595
2024-12-31,ZBRA,-0.0317,-0.0387,-0.0295,0.0182,-0.0110,-0.051057


In [19]:
betas = (factorData.groupby(level=1, group_keys=False).apply(
    lambda x: RollingOLS(
        endog=x['return_1m'],
        exog=sm.add_constant(x.drop('return_1m', axis=1)),
        window=min(24, x.shape[0]),
        min_nobs=len(x.columns) + 1
    ).fit(params_only=True).params.drop('const', axis=1)
))
betas

Unnamed: 0_level_0,Unnamed: 1_level_0,Mkt-RF,SMB,HML,RMW,CMA
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-04-30,A,,,,,
2018-04-30,AAPL,,,,,
2018-04-30,ABBV,,,,,
2018-04-30,ABT,,,,,
2018-04-30,ACGL,,,,,
...,...,...,...,...,...,...
2024-12-31,XYL,1.456732,-0.257336,0.367596,0.513712,0.012088
2024-12-31,YUM,0.759704,0.465004,-0.883379,0.358893,1.779204
2024-12-31,ZBH,1.351253,-0.043120,-0.161246,0.285042,1.502347
2024-12-31,ZBRA,1.333694,1.418692,-1.078997,-0.493871,1.666038


In [20]:
data = data.join(betas.groupby('ticker').shift())
data

Unnamed: 0_level_0,Unnamed: 1_level_0,dollar-volume,adj close,garman-klass-vol,rsi,bb_low,bb_mid,bb_high,atr,macd,return_1m,return_2m,return_3m,return_6m,return_9m,return_12m,Mkt-RF,SMB,HML,RMW,CMA
date,ticker,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2018-04-30,A,164.751899,62.553654,-0.001340,44.520041,4.125545,4.165293,4.205042,-0.988901,-0.323773,-0.015146,-0.019904,-0.035487,-0.004954,0.011327,0.015637,,,,,
2018-04-30,AAPL,5060.827080,38.993225,-0.000036,44.093180,3.659055,3.715842,3.772629,-1.111677,-0.523212,-0.015020,-0.036775,-0.002988,-0.002490,0.013081,0.013071,,,,,
2018-04-30,ABBV,457.410090,71.553276,-0.040410,47.693770,4.193913,4.244860,4.295808,0.632454,-0.810684,0.030640,-0.066680,-0.045625,0.014256,0.039338,0.034580,,,,,
2018-04-30,ABT,339.340564,51.687737,-0.007719,45.310069,3.954631,3.976980,3.999329,-0.826533,-0.429200,-0.025203,-0.016043,-0.020529,0.013257,0.020388,0.025886,,,,,
2018-04-30,ACGL,45.069252,25.398499,-0.001527,39.144052,3.241317,3.293302,3.345287,-1.001287,-0.634783,-0.063793,-0.046077,-0.039585,-0.035667,-0.021296,-0.015770,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-03-31,XYL,188.918267,120.919998,0.000100,41.466266,4.800888,4.851952,4.903017,0.870868,-0.797464,-0.076171,-0.011127,0.014931,-0.017262,-0.011737,-0.004605,,,,,
2025-03-31,YUM,340.510976,157.300003,0.000059,63.526885,5.000937,5.049923,5.098909,1.602085,2.404496,0.005947,0.079612,0.054191,0.021585,0.020927,0.012210,,,,,
2025-03-31,ZBH,220.313828,112.879997,0.000219,60.548229,4.625923,4.677197,4.728471,0.142403,0.634891,0.082055,0.015409,0.022374,0.007848,0.004875,-0.012392,,,,,
2025-03-31,ZBRA,178.565405,282.579987,0.000081,31.882698,5.595533,5.702298,5.809063,0.332612,-2.574897,-0.103063,-0.123582,-0.098909,-0.044067,-0.009857,-0.005370,,,,,


In [25]:
# factors = ['Mkt-RF', 'SMB', 'HML', 'RMW', 'CMA']
# data.loc[:, factors] = data.groupby('ticker', group_keys = False)[factors].apply(lambda x: x.fillna(x.mean()))
data = data.drop('adj close', axis = 1)
data = data.dropna()
data.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 41213 entries, (Timestamp('2018-04-30 00:00:00'), 'A') to (Timestamp('2025-03-31 00:00:00'), 'ZTS')
Data columns (total 19 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   dollar-volume     41213 non-null  float64
 1   garman-klass-vol  41213 non-null  float64
 2   rsi               41213 non-null  float64
 3   bb_low            41213 non-null  float64
 4   bb_mid            41213 non-null  float64
 5   bb_high           41213 non-null  float64
 6   atr               41213 non-null  float64
 7   macd              41213 non-null  float64
 8   return_1m         41213 non-null  float64
 9   return_2m         41213 non-null  float64
 10  return_3m         41213 non-null  float64
 11  return_6m         41213 non-null  float64
 12  return_9m         41213 non-null  float64
 13  return_12m        41213 non-null  float64
 14  Mkt-RF            41213 non-null  float64
 15  SMB          