<a href="https://colab.research.google.com/github/Kingadamy/210-demo-stl-list/blob/main/Meeting_2_Volatility_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Setting Up Dependencies**

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

**Loading and Pre-Analysis**

In [None]:
ticker = "SPY"
df = yf.download(ticker, start="2022-01-01", end="2025-01-01")

  df = yf.download(ticker, start="2022-01-01", end="2025-01-01")
[*********************100%***********************]  1 of 1 completed


In [None]:
df

Price,Close,High,Low,Open,Volume
Ticker,SPY,SPY,SPY,SPY,SPY
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2022-01-03,453.210388,453.343222,449.548364,451.872697,72668200
2022-01-04,453.058624,455.364023,451.189655,454.642990,71178700
2022-01-05,444.358948,453.466614,444.264071,452.688661,104538900
2022-01-06,443.941467,446.673770,441.560185,443.894043,86858900
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600
...,...,...,...,...,...
2024-12-24,596.076965,596.116657,590.297589,590.882491,33160100
2024-12-26,596.116638,597.246690,592.884946,594.292594,41219100
2024-12-27,589.841553,592.587511,585.628470,592.349545,64969300
2024-12-30,583.110535,586.599978,579.333632,582.783445,56578800


**Feature Engineering (5 inputs)**

In [None]:
df["PCT_return"] = df["Close"].pct_change()
df

Price,Close,High,Low,Open,Volume,PCT_return
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2022-01-03,453.210388,453.343222,449.548364,451.872697,72668200,
2022-01-04,453.058624,455.364023,451.189655,454.642990,71178700,-0.000335
2022-01-05,444.358948,453.466614,444.264071,452.688661,104538900,-0.019202
2022-01-06,443.941467,446.673770,441.560185,443.894043,86858900,-0.000940
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600,-0.003954
...,...,...,...,...,...,...
2024-12-24,596.076965,596.116657,590.297589,590.882491,33160100,0.011115
2024-12-26,596.116638,597.246690,592.884946,594.292594,41219100,0.000067
2024-12-27,589.841553,592.587511,585.628470,592.349545,64969300,-0.010527
2024-12-30,583.110535,586.599978,579.333632,582.783445,56578800,-0.011412


In [None]:
df["3day_volatility"] = df["PCT_return"].rolling(window=3).std()
df

Price,Close,High,Low,Open,Volume,PCT_return,3day_volatility
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1,Unnamed: 7_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2022-01-03,453.210388,453.343222,449.548364,451.872697,72668200,,
2022-01-04,453.058624,455.364023,451.189655,454.642990,71178700,-0.000335,
2022-01-05,444.358948,453.466614,444.264071,452.688661,104538900,-0.019202,
2022-01-06,443.941467,446.673770,441.560185,443.894043,86858900,-0.000940,0.010723
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600,-0.003954,0.009791
...,...,...,...,...,...,...,...
2024-12-24,596.076965,596.116657,590.297589,590.882491,33160100,0.011115,0.003250
2024-12-26,596.116638,597.246690,592.884946,594.292594,41219100,0.000067,0.005529
2024-12-27,589.841553,592.587511,585.628470,592.349545,64969300,-0.010527,0.010822
2024-12-30,583.110535,586.599978,579.333632,582.783445,56578800,-0.011412,0.006387


In [None]:
df["volume_norm"] = df["Volume"].rolling(3).mean() / df['Volume'].rolling(20).mean()
df

Price,Close,High,Low,Open,Volume,PCT_return,3day_volatility,volume_norm
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
2022-01-03,453.210388,453.343222,449.548364,451.872697,72668200,,,
2022-01-04,453.058624,455.364023,451.189655,454.642990,71178700,-0.000335,,
2022-01-05,444.358948,453.466614,444.264071,452.688661,104538900,-0.019202,,
2022-01-06,443.941467,446.673770,441.560185,443.894043,86858900,-0.000940,0.010723,
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600,-0.003954,0.009791,
...,...,...,...,...,...,...,...,...
2024-12-24,596.076965,596.116657,590.297589,590.882491,33160100,0.011115,0.003250,1.520194
2024-12-26,596.116638,597.246690,592.884946,594.292594,41219100,0.000067,0.005529,0.931231
2024-12-27,589.841553,592.587511,585.628470,592.349545,64969300,-0.010527,0.010822,0.951773
2024-12-30,583.110535,586.599978,579.333632,582.783445,56578800,-0.011412,0.006387,1.082448


In [None]:
df["fast_sma"] = df["Close"].rolling(5).mean()
df["slow_sma"] = df["Close"].rolling(20).mean()

In [None]:
df["Slow_Slope"] = (df["slow_sma"] - df["slow_sma"].shift(3) / (3 * df["slow_sma"].shift(3)))
df["Fast_Slope"] = (df["fast_sma"] - df["fast_sma"].shift(3) / (3 * df["fast_sma"].shift(3)))
df

Price,Close,High,Low,Open,Volume,PCT_return,3day_volatility,volume_norm,fast_sma,slow_sma,Slow_Slope,Fast_Slope
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2022-01-03,453.210388,453.343222,449.548364,451.872697,72668200,,,,,,,
2022-01-04,453.058624,455.364023,451.189655,454.642990,71178700,-0.000335,,,,,,
2022-01-05,444.358948,453.466614,444.264071,452.688661,104538900,-0.019202,,,,,,
2022-01-06,443.941467,446.673770,441.560185,443.894043,86858900,-0.000940,0.010723,,,,,
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600,-0.003954,0.009791,,447.351147,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,596.076965,596.116657,590.297589,590.882491,33160100,0.011115,0.003250,1.520194,585.982886,594.363748,594.030415,585.649552
2024-12-26,596.116638,597.246690,592.884946,594.292594,41219100,0.000067,0.005529,0.931231,589.358630,594.497815,594.164482,589.025297
2024-12-27,589.841553,592.587511,585.628470,592.349545,64969300,-0.010527,0.010822,0.951773,591.514941,594.408035,594.074702,591.181608
2024-12-30,583.110535,586.599978,579.333632,582.783445,56578800,-0.011412,0.006387,1.082448,590.934021,593.797937,593.464604,590.600688


In [None]:
df['DOW-T'] = df.index.dayofweek
df

Price,Close,High,Low,Open,Volume,PCT_return,3day_volatility,volume_norm,fast_sma,slow_sma,Slow_Slope,Fast_Slope,DOW-T
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
2022-01-03,453.210388,453.343222,449.548364,451.872697,72668200,,,,,,,,0
2022-01-04,453.058624,455.364023,451.189655,454.642990,71178700,-0.000335,,,,,,,1
2022-01-05,444.358948,453.466614,444.264071,452.688661,104538900,-0.019202,,,,,,,2
2022-01-06,443.941467,446.673770,441.560185,443.894043,86858900,-0.000940,0.010723,,,,,,3
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600,-0.003954,0.009791,,447.351147,,,,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,596.076965,596.116657,590.297589,590.882491,33160100,0.011115,0.003250,1.520194,585.982886,594.363748,594.030415,585.649552,1
2024-12-26,596.116638,597.246690,592.884946,594.292594,41219100,0.000067,0.005529,0.931231,589.358630,594.497815,594.164482,589.025297,3
2024-12-27,589.841553,592.587511,585.628470,592.349545,64969300,-0.010527,0.010822,0.951773,591.514941,594.408035,594.074702,591.181608,4
2024-12-30,583.110535,586.599978,579.333632,582.783445,56578800,-0.011412,0.006387,1.082448,590.934021,593.797937,593.464604,590.600688,0


In [None]:
real_vol = df["PCT_return"].rolling(10).std()
future_vol = real_vol.shift(-1)
q = future_vol.quantile([1/3, 2/3]).values
df["target_cls"] = np.select([future_vol <= q[0], (future_vol > q[0]) & (future_vol <= q[1]), future_vol > q[1]], [0, 1, 2], default=np.nan).astype('float')

In [None]:
df

Price,Close,High,Low,Open,Volume,PCT_return,3day_volatility,volume_norm,fast_sma,slow_sma,Slow_Slope,Fast_Slope,DOW-T,target_cls
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
2022-01-03,453.210388,453.343222,449.548364,451.872697,72668200,,,,,,,,0,
2022-01-04,453.058624,455.364023,451.189655,454.642990,71178700,-0.000335,,,,,,,1,
2022-01-05,444.358948,453.466614,444.264071,452.688661,104538900,-0.019202,,,,,,,2,
2022-01-06,443.941467,446.673770,441.560185,443.894043,86858900,-0.000940,0.010723,,,,,,3,
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600,-0.003954,0.009791,,447.351147,,,,4,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,596.076965,596.116657,590.297589,590.882491,33160100,0.011115,0.003250,1.520194,585.982886,594.363748,594.030415,585.649552,1,2.0
2024-12-26,596.116638,597.246690,592.884946,594.292594,41219100,0.000067,0.005529,0.931231,589.358630,594.497815,594.164482,589.025297,3,2.0
2024-12-27,589.841553,592.587511,585.628470,592.349545,64969300,-0.010527,0.010822,0.951773,591.514941,594.408035,594.074702,591.181608,4,2.0
2024-12-30,583.110535,586.599978,579.333632,582.783445,56578800,-0.011412,0.006387,1.082448,590.934021,593.797937,593.464604,590.600688,0,2.0


In [None]:
features = ['3day_volatility', 'volume_norm', 'Slow_Slope', 'Fast_Slope', 'DOW-T']
training_df = df[features + ['target_cls']].dropna()
training_df

Price,3day_volatility,volume_norm,Slow_Slope,Fast_Slope,DOW-T,target_cls
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2022-02-03,0.018385,0.920992,428.970413,426.352922,3,2.0
2022-02-04,0.017908,0.897985,428.057749,427.633683,4,2.0
2022-02-07,0.014550,0.814718,427.164540,427.130870,0,2.0
2022-02-08,0.005857,0.731445,426.473397,426.749491,1,2.0
2022-02-09,0.009040,0.660152,425.894209,426.785545,2,2.0
...,...,...,...,...,...,...
2024-12-23,0.006160,1.872330,593.744209,585.840470,0,2.0
2024-12-24,0.003250,1.520194,594.030415,585.649552,1,2.0
2024-12-26,0.005529,0.931231,594.164482,589.025297,3,2.0
2024-12-27,0.010822,0.951773,594.074702,591.181608,4,2.0


**Model #1 Training (Logistic Regression)**

**Model #1 Analysis (Logistic Regression)**

**Model #2 Training  (XGBoost)**

**Model #2 Analysis (XGBoost)**