<a href="https://colab.research.google.com/github/Kingadamy/210-demo-stl-list/blob/main/Meeting_2_Volatility_Classification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Setting Up Dependencies**

In [None]:
import yfinance as yf
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

**Loading and Pre-Analysis**

In [None]:
ticker = "SPY"
df = yf.download(ticker, start="2022-01-01", end="2025-01-01")

  df = yf.download(ticker, start="2022-01-01", end="2025-01-01")
[*********************100%***********************]  1 of 1 completed


In [None]:
df

Price,Close,High,Low,Open,Volume
Ticker,SPY,SPY,SPY,SPY,SPY
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2022-01-03,453.210419,453.343253,449.548394,451.872728,72668200
2022-01-04,453.058594,455.363992,451.189625,454.642960,71178700
2022-01-05,444.358917,453.466583,444.264040,452.688630,104538900
2022-01-06,443.941437,446.673739,441.560154,443.894013,86858900
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600
...,...,...,...,...,...
2024-12-24,596.076904,596.116596,590.297529,590.882430,33160100
2024-12-26,596.116699,597.246751,592.885006,594.292655,41219100
2024-12-27,589.841614,592.587572,585.628530,592.349606,64969300
2024-12-30,583.110596,586.600040,579.333692,582.783506,56578800


**Feature Engineering (5 inputs)**

In [None]:
df["PCT_return"] = df["Close"].pct_change()
df

Price,Close,High,Low,Open,Volume,PCT_return
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2022-01-03,453.210419,453.343253,449.548394,451.872728,72668200,
2022-01-04,453.058594,455.363992,451.189625,454.642960,71178700,-0.000335
2022-01-05,444.358917,453.466583,444.264040,452.688630,104538900,-0.019202
2022-01-06,443.941437,446.673739,441.560154,443.894013,86858900,-0.000940
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600,-0.003954
...,...,...,...,...,...,...
2024-12-24,596.076904,596.116596,590.297529,590.882430,33160100,0.011115
2024-12-26,596.116699,597.246751,592.885006,594.292655,41219100,0.000067
2024-12-27,589.841614,592.587572,585.628530,592.349606,64969300,-0.010527
2024-12-30,583.110596,586.600040,579.333692,582.783506,56578800,-0.011412


In [None]:
df["3day_volatility"] = df["PCT_return"].rolling(window=3).std()
df

Price,Close,High,Low,Open,Volume,PCT_return,3day_volatility
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1,Unnamed: 7_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2022-01-03,453.210419,453.343253,449.548394,451.872728,72668200,,
2022-01-04,453.058594,455.363992,451.189625,454.642960,71178700,-0.000335,
2022-01-05,444.358917,453.466583,444.264040,452.688630,104538900,-0.019202,
2022-01-06,443.941437,446.673739,441.560154,443.894013,86858900,-0.000940,0.010723
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600,-0.003954,0.009791
...,...,...,...,...,...,...,...
2024-12-24,596.076904,596.116596,590.297529,590.882430,33160100,0.011115,0.003250
2024-12-26,596.116699,597.246751,592.885006,594.292655,41219100,0.000067,0.005529
2024-12-27,589.841614,592.587572,585.628530,592.349606,64969300,-0.010527,0.010822
2024-12-30,583.110596,586.600040,579.333692,582.783506,56578800,-0.011412,0.006387


In [None]:
df["volume_norm"] = df["Volume"].rolling(3).mean() / df['Volume'].rolling(20).mean()
df

Price,Close,High,Low,Open,Volume,PCT_return,3day_volatility,volume_norm
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
2022-01-03,453.210419,453.343253,449.548394,451.872728,72668200,,,
2022-01-04,453.058594,455.363992,451.189625,454.642960,71178700,-0.000335,,
2022-01-05,444.358917,453.466583,444.264040,452.688630,104538900,-0.019202,,
2022-01-06,443.941437,446.673739,441.560154,443.894013,86858900,-0.000940,0.010723,
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600,-0.003954,0.009791,
...,...,...,...,...,...,...,...,...
2024-12-24,596.076904,596.116596,590.297529,590.882430,33160100,0.011115,0.003250,1.520194
2024-12-26,596.116699,597.246751,592.885006,594.292655,41219100,0.000067,0.005529,0.931231
2024-12-27,589.841614,592.587572,585.628530,592.349606,64969300,-0.010527,0.010822,0.951773
2024-12-30,583.110596,586.600040,579.333692,582.783506,56578800,-0.011412,0.006387,1.082448


In [None]:
df["fast_sma"] = df["Close"].rolling(5).mean()
df["slow_sma"] = df["Close"].rolling(20).mean()

In [None]:
df["Slow_Slope"] = (df["slow_sma"] - df["slow_sma"].shift(3) / (3 * df["slow_sma"].shift(3)))
df["Fast_Slope"] = (df["fast_sma"] - df["fast_sma"].shift(3) / (3 * df["fast_sma"].shift(3)))
df

Price,Close,High,Low,Open,Volume,PCT_return,3day_volatility,volume_norm,fast_sma,slow_sma,Slow_Slope,Fast_Slope
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2
2022-01-03,453.210419,453.343253,449.548394,451.872728,72668200,,,,,,,
2022-01-04,453.058594,455.363992,451.189625,454.642960,71178700,-0.000335,,,,,,
2022-01-05,444.358917,453.466583,444.264040,452.688630,104538900,-0.019202,,,,,,
2022-01-06,443.941437,446.673739,441.560154,443.894013,86858900,-0.000940,0.010723,,,,,
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600,-0.003954,0.009791,,447.351135,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,596.076904,596.116596,590.297529,590.882430,33160100,0.011115,0.003250,1.520194,585.982861,594.363736,594.030403,585.649528
2024-12-26,596.116699,597.246751,592.885006,594.292655,41219100,0.000067,0.005529,0.931231,589.358618,594.497806,594.164472,589.025285
2024-12-27,589.841614,592.587572,585.628530,592.349606,64969300,-0.010527,0.010822,0.951773,591.514941,594.408032,594.074699,591.181608
2024-12-30,583.110596,586.600040,579.333692,582.783506,56578800,-0.011412,0.006387,1.082448,590.934033,593.797940,593.464607,590.600700


In [None]:
df['DOW-T'] = df.index.dayofweek
df

Price,Close,High,Low,Open,Volume,PCT_return,3day_volatility,volume_norm,fast_sma,slow_sma,Slow_Slope,Fast_Slope,DOW-T
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2
2022-01-03,453.210419,453.343253,449.548394,451.872728,72668200,,,,,,,,0
2022-01-04,453.058594,455.363992,451.189625,454.642960,71178700,-0.000335,,,,,,,1
2022-01-05,444.358917,453.466583,444.264040,452.688630,104538900,-0.019202,,,,,,,2
2022-01-06,443.941437,446.673739,441.560154,443.894013,86858900,-0.000940,0.010723,,,,,,3
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600,-0.003954,0.009791,,447.351135,,,,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,596.076904,596.116596,590.297529,590.882430,33160100,0.011115,0.003250,1.520194,585.982861,594.363736,594.030403,585.649528,1
2024-12-26,596.116699,597.246751,592.885006,594.292655,41219100,0.000067,0.005529,0.931231,589.358618,594.497806,594.164472,589.025285,3
2024-12-27,589.841614,592.587572,585.628530,592.349606,64969300,-0.010527,0.010822,0.951773,591.514941,594.408032,594.074699,591.181608,4
2024-12-30,583.110596,586.600040,579.333692,582.783506,56578800,-0.011412,0.006387,1.082448,590.934033,593.797940,593.464607,590.600700,0


In [None]:
real_vol = df["PCT_return"].rolling(10).std()
future_vol = real_vol.shift(-1)
q = future_vol.quantile([1/3, 2/3]).values
df["target_cls"] = np.select([future_vol <= q[0], (future_vol > q[0]) & (future_vol <= q[1]), future_vol > q[1]], [0, 1, 2], default=np.nan).astype('float')

In [None]:
df

Price,Close,High,Low,Open,Volume,PCT_return,3day_volatility,volume_norm,fast_sma,slow_sma,Slow_Slope,Fast_Slope,DOW-T,target_cls
Ticker,SPY,SPY,SPY,SPY,SPY,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2
2022-01-03,453.210419,453.343253,449.548394,451.872728,72668200,,,,,,,,0,
2022-01-04,453.058594,455.363992,451.189625,454.642960,71178700,-0.000335,,,,,,,1,
2022-01-05,444.358917,453.466583,444.264040,452.688630,104538900,-0.019202,,,,,,,2,
2022-01-06,443.941437,446.673739,441.560154,443.894013,86858900,-0.000940,0.010723,,,,,,3,
2022-01-07,442.186310,445.136827,440.820159,443.950934,85111600,-0.003954,0.009791,,447.351135,,,,4,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-24,596.076904,596.116596,590.297529,590.882430,33160100,0.011115,0.003250,1.520194,585.982861,594.363736,594.030403,585.649528,1,2.0
2024-12-26,596.116699,597.246751,592.885006,594.292655,41219100,0.000067,0.005529,0.931231,589.358618,594.497806,594.164472,589.025285,3,2.0
2024-12-27,589.841614,592.587572,585.628530,592.349606,64969300,-0.010527,0.010822,0.951773,591.514941,594.408032,594.074699,591.181608,4,2.0
2024-12-30,583.110596,586.600040,579.333692,582.783506,56578800,-0.011412,0.006387,1.082448,590.934033,593.797940,593.464607,590.600700,0,2.0


In [None]:
features = ['3day_volatility', 'volume_norm', 'Slow_Slope', 'Fast_Slope', 'DOW-T']
training_df = df[features + ['target_cls']].dropna()
training_df

Price,3day_volatility,volume_norm,Slow_Slope,Fast_Slope,DOW-T,target_cls
Ticker,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2022-02-03,0.018385,0.920992,428.970416,426.352934,3,2.0
2022-02-04,0.017908,0.897985,428.057756,427.633689,4,2.0
2022-02-07,0.014550,0.814718,427.164543,427.130882,0,2.0
2022-02-08,0.005857,0.731445,426.473403,426.749491,1,2.0
2022-02-09,0.009040,0.660152,425.894216,426.785545,2,2.0
...,...,...,...,...,...,...
2024-12-23,0.006160,1.872330,593.744200,585.840446,0,2.0
2024-12-24,0.003250,1.520194,594.030403,585.649528,1,2.0
2024-12-26,0.005529,0.931231,594.164472,589.025285,3,2.0
2024-12-27,0.010822,0.951773,594.074699,591.181608,4,2.0


**Model #1 Training (Logistic Regression)**

In [None]:
X = training_df[features]
Y = training_df['target_cls']
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42, stratify=Y)

**Model #1 Analysis (Logistic Regression)**

In [None]:
log_reg = LogisticRegression(multi_class='multinomial', max_iter=1000, solver='lbfgs')
log_reg.fit(X_train, y_train)
y_pred = log_reg.predict(X_test)
accuracy = log_reg.score(X_test, y_test)
print(accuracy)
print(classification_report(y_test, y_pred))







0.5
              precision    recall  f1-score   support

         0.0       0.53      0.62      0.57        50
         1.0       0.30      0.27      0.28        48
         2.0       0.66      0.60      0.63        48

    accuracy                           0.50       146
   macro avg       0.50      0.50      0.50       146
weighted avg       0.50      0.50      0.50       146



STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


**Model #2 Training  (XGBoost)**

**Model #2 Analysis (XGBoost)**