In [70]:
import yfinance as yf
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [88]:
spyTicker = yf.Ticker("SPY") # SPDR S&P 500 ETF Trust
vixTicker = yf.Ticker("^VIX") # VIX
df_spy = spyTicker.history(period="max")
df_vix = vixTicker.history(period="max")

In [89]:
#Explore Dataset
df_spy.index = df_spy.index.date

In [64]:
df_spy

Unnamed: 0,Open,High,Low,Close,Volume,Dividends,Stock Splits,Capital Gains
1993-01-29,24.330323,24.330323,24.209276,24.313030,1003200,0.0,0.0,0.0
1993-02-01,24.330333,24.485964,24.330333,24.485964,480500,0.0,0.0,0.0
1993-02-02,24.468680,24.555142,24.416803,24.537849,201300,0.0,0.0,0.0
1993-02-03,24.572418,24.814511,24.555126,24.797218,529400,0.0,0.0,0.0
1993-02-04,24.883684,24.952853,24.607006,24.900976,531500,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...
2025-11-10,677.239990,682.179993,675.030029,681.440002,75842900,0.0,0.0,0.0
2025-11-11,679.950012,683.570007,678.729980,683.000000,58953400,0.0,0.0,0.0
2025-11-12,684.789978,684.960022,680.950012,683.380005,62312500,0.0,0.0,0.0
2025-11-13,680.500000,680.859985,670.520020,672.039978,103457800,0.0,0.0,0.0


In [90]:
df_vix = df_vix.drop(df_vix.loc['1990':'1993-01-28'].index)

In [91]:
df_vix.index = df_vix.index.date

In [92]:
#join dataset
dataset = pd.merge(df_spy, df_vix, left_index=True, right_index=True, suffixes=('_SPY','_VIX'))



In [93]:
dataset

Unnamed: 0,Open_SPY,High_SPY,Low_SPY,Close_SPY,Volume_SPY,Dividends_SPY,Stock Splits_SPY,Capital Gains,Open_VIX,High_VIX,Low_VIX,Close_VIX,Volume_VIX,Dividends_VIX,Stock Splits_VIX
1993-01-29,24.330323,24.330323,24.209276,24.313030,1003200,0.0,0.0,0.0,12.490000,13.160000,12.420000,12.420000,0,0.0,0.0
1993-02-01,24.330335,24.485966,24.330335,24.485966,480500,0.0,0.0,0.0,12.510000,12.920000,12.180000,12.330000,0,0.0,0.0
1993-02-02,24.468667,24.555128,24.416790,24.537836,201300,0.0,0.0,0.0,12.470000,12.890000,12.220000,12.250000,0,0.0,0.0
1993-02-03,24.572412,24.814505,24.555120,24.797213,529400,0.0,0.0,0.0,11.980000,12.340000,11.790000,12.120000,0,0.0,0.0
1993-02-04,24.883678,24.952847,24.607001,24.900970,531500,0.0,0.0,0.0,11.860000,12.840000,11.690000,12.290000,0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-11-10,677.239990,682.179993,675.030029,681.440002,75842900,0.0,0.0,0.0,18.580000,18.820000,17.600000,17.600000,0,0.0,0.0
2025-11-11,679.950012,683.570007,678.729980,683.000000,58953400,0.0,0.0,0.0,17.900000,18.010000,17.250000,17.280001,0,0.0,0.0
2025-11-12,684.789978,684.960022,680.950012,683.380005,62312500,0.0,0.0,0.0,17.209999,18.059999,17.100000,17.510000,0,0.0,0.0
2025-11-13,680.500000,680.859985,670.520020,672.039978,103457800,0.0,0.0,0.0,17.610001,21.309999,17.510000,20.000000,0,0.0,0.0


In [95]:
# SPY features
# daily log price and log-returns for SPY
dataset["log_close_SPY"] = np.log(dataset["Close_SPY"])
dataset["ret_1d"] = dataset["log_close_SPY"].diff(1)   # daily log-return

# lagged returns (1 and 2 days)
dataset["ret_lag1"] = dataset["ret_1d"].shift(1)
dataset["ret_lag2"] = dataset["ret_1d"].shift(2)

# rolling statistics of returns
ret_window = 10
dataset["ret_roll_mean_10"] = dataset["ret_1d"].rolling(window=ret_window).mean()
dataset["ret_roll_std_10"] = dataset["ret_1d"].rolling(window=ret_window).std()

# Technical indicators: moving averages on Close (5, 10, 20 days)
dataset["ma_5"] = dataset["Close_SPY"].rolling(window=5).mean()
dataset["ma_10"] = dataset["Close_SPY"].rolling(window=10).mean()
dataset["ma_20"] = dataset["Close_SPY"].rolling(window=20).mean()

# Rate of change (ROC) – 5 and 10 day
dataset["roc_5"] = dataset["Close_SPY"].pct_change(periods=5)
dataset["roc_10"] = dataset["Close_SPY"].pct_change(periods=10)

# Volume-related feature (log-volume + 10-day rolling mean)
dataset["log_volume"] = np.log(dataset["Volume_SPY"].replace(0, np.nan))
dataset["vol_roll_mean_10"] = dataset["log_volume"].rolling(window=10).mean()

In [96]:
dataset

Unnamed: 0,Open_SPY,High_SPY,Low_SPY,Close_SPY,Volume_SPY,Dividends_SPY,Stock Splits_SPY,Capital Gains,Open_VIX,High_VIX,...,ret_lag2,ret_roll_mean_10,ret_roll_std_10,ma_5,ma_10,ma_20,roc_5,roc_10,log_volume,vol_roll_mean_10
1993-01-29,24.330323,24.330323,24.209276,24.313030,1003200,0.0,0.0,0.0,12.490000,13.160000,...,,,,,,,,,13.818705,
1993-02-01,24.330335,24.485966,24.330335,24.485966,480500,0.0,0.0,0.0,12.510000,12.920000,...,,,,,,,,,13.082583,
1993-02-02,24.468667,24.555128,24.416790,24.537836,201300,0.0,0.0,0.0,12.470000,12.890000,...,,,,,,,,,12.212552,
1993-02-03,24.572412,24.814505,24.555120,24.797213,529400,0.0,0.0,0.0,11.980000,12.340000,...,0.007088,,,,,,,,13.179500,
1993-02-04,24.883678,24.952847,24.607001,24.900970,531500,0.0,0.0,0.0,11.860000,12.840000,...,0.002116,,,24.607003,,,,,13.183458,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-11-10,677.239990,682.179993,675.030029,681.440002,75842900,0.0,0.0,0.0,18.580000,18.820000,...,-0.010787,-0.000556,0.008511,675.107996,679.522003,674.614499,-0.002780,-0.005545,18.144175,18.163432
2025-11-11,679.950012,683.570007,678.729980,683.000000,58953400,0.0,0.0,0.0,17.900000,18.010000,...,0.000984,-0.000593,0.008497,676.659998,679.116003,675.653000,0.011492,-0.005909,17.892258,18.158817
2025-11-12,684.789978,684.960022,680.950012,683.380005,62312500,0.0,0.0,0.0,17.209999,18.059999,...,0.015484,-0.000585,0.008498,677.819995,678.715002,676.563501,0.008560,-0.005834,17.947673,18.126998
2025-11-13,680.500000,680.859985,670.520020,672.039978,103457800,0.0,0.0,0.0,17.610001,21.309999,...,0.002287,-0.001152,0.009415,678.165991,677.935999,677.133499,0.002581,-0.011459,18.454674,18.157400
