### Imports

In [1]:
# Remove unwanted warnings
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=RuntimeWarning)

# Data Management
import polars as pl
import pandas as pd
from pandas_datareader.data import DataReader
from ta import add_all_ta_features
import yfinance as yf


# Statistics 
from statsmodels.tsa.stattools import adfuller

# Unsupervised Machine Learning
from sklearn.decomposition import PCA # Principle Component Analysis

# Supervised Machine Learning
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score

# Reporting
import matplotlib.pyplot as plt



### Initial Data Extraction

In [2]:
# Data Extraction
import yfinance as yf

start_date = '2017-01-01'
end_date = '2022-06-01'
symbol = '^VIX'
df_pd = yf.download(symbol, start=start_date, end=end_date)
df_pd.head()


[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-01-03,14.07,14.07,12.85,12.85,12.85,0
2017-01-04,12.78,12.8,11.63,11.85,11.85,0
2017-01-05,11.96,12.09,11.4,11.67,11.67,0
2017-01-06,11.7,11.74,10.98,11.32,11.32,0
2017-01-09,11.71,12.08,11.46,11.56,11.56,0


In [3]:
# Convert to Polars DataFrame

# Reset the index to make the date a column
df_pd.reset_index(inplace=True)

df = pl.from_pandas(df_pd)
print(df.head)

<bound method DataFrame.head of shape: (1_362, 7)
┌─────────────────────┬───────────┬───────────┬───────────┬───────────┬───────────┬────────┐
│ Date                ┆ Open      ┆ High      ┆ Low       ┆ Close     ┆ Adj Close ┆ Volume │
│ ---                 ┆ ---       ┆ ---       ┆ ---       ┆ ---       ┆ ---       ┆ ---    │
│ datetime[ns]        ┆ f64       ┆ f64       ┆ f64       ┆ f64       ┆ f64       ┆ i64    │
╞═════════════════════╪═══════════╪═══════════╪═══════════╪═══════════╪═══════════╪════════╡
│ 2017-01-03 00:00:00 ┆ 14.07     ┆ 14.07     ┆ 12.85     ┆ 12.85     ┆ 12.85     ┆ 0      │
│ 2017-01-04 00:00:00 ┆ 12.78     ┆ 12.8      ┆ 11.63     ┆ 11.85     ┆ 11.85     ┆ 0      │
│ 2017-01-05 00:00:00 ┆ 11.96     ┆ 12.09     ┆ 11.4      ┆ 11.67     ┆ 11.67     ┆ 0      │
│ 2017-01-06 00:00:00 ┆ 11.7      ┆ 11.74     ┆ 10.98     ┆ 11.32     ┆ 11.32     ┆ 0      │
│ 2017-01-09 00:00:00 ┆ 11.71     ┆ 12.08     ┆ 11.46     ┆ 11.56     ┆ 11.56     ┆ 0      │
│ …                 

In [4]:
df.schema


OrderedDict([('Date', Datetime(time_unit='ns', time_zone=None)),
             ('Open', Float64),
             ('High', Float64),
             ('Low', Float64),
             ('Close', Float64),
             ('Adj Close', Float64),
             ('Volume', Int64)])

In [5]:
# Convert the 'Date' column to the desired timezone (New York)
df = df.with_columns(
    pl.col('Date').dt.replace_time_zone('America/New_York')
)

print(df.head())

shape: (5, 7)
┌────────────────────────────────┬───────┬───────┬───────┬───────┬───────────┬────────┐
│ Date                           ┆ Open  ┆ High  ┆ Low   ┆ Close ┆ Adj Close ┆ Volume │
│ ---                            ┆ ---   ┆ ---   ┆ ---   ┆ ---   ┆ ---       ┆ ---    │
│ datetime[ns, America/New_York] ┆ f64   ┆ f64   ┆ f64   ┆ f64   ┆ f64       ┆ i64    │
╞════════════════════════════════╪═══════╪═══════╪═══════╪═══════╪═══════════╪════════╡
│ 2017-01-03 00:00:00 EST        ┆ 14.07 ┆ 14.07 ┆ 12.85 ┆ 12.85 ┆ 12.85     ┆ 0      │
│ 2017-01-04 00:00:00 EST        ┆ 12.78 ┆ 12.8  ┆ 11.63 ┆ 11.85 ┆ 11.85     ┆ 0      │
│ 2017-01-05 00:00:00 EST        ┆ 11.96 ┆ 12.09 ┆ 11.4  ┆ 11.67 ┆ 11.67     ┆ 0      │
│ 2017-01-06 00:00:00 EST        ┆ 11.7  ┆ 11.74 ┆ 10.98 ┆ 11.32 ┆ 11.32     ┆ 0      │
│ 2017-01-09 00:00:00 EST        ┆ 11.71 ┆ 12.08 ┆ 11.46 ┆ 11.56 ┆ 11.56     ┆ 0      │
└────────────────────────────────┴───────┴───────┴───────┴───────┴───────────┴────────┘


In [6]:
# Add TA
# Convert back to Pandas DataFrame
df_pd = df.to_pandas()

# Add technical analysis features
df_pd = add_all_ta_features(df_pd, open='Open', high='High', low='Low', close='Adj Close', volume='Volume', fillna=True)

# Convert back to Polars Dataframe, remember need to have the pyarrow module installed
df = pl.from_pandas(df_pd)

In [7]:
df

Date,Open,High,Low,Close,Adj Close,Volume,volume_adi,volume_obv,volume_cmf,volume_fi,volume_em,volume_sma_em,volume_vpt,volume_vwap,volume_mfi,volume_nvi,volatility_bbm,volatility_bbh,volatility_bbl,volatility_bbw,volatility_bbp,volatility_bbhi,volatility_bbli,volatility_kcc,volatility_kch,volatility_kcl,volatility_kcw,volatility_kcp,volatility_kchi,volatility_kcli,volatility_dcl,volatility_dch,volatility_dcm,volatility_dcw,volatility_dcp,volatility_atr,…,trend_ichimoku_a,trend_ichimoku_b,trend_stc,trend_adx,trend_adx_pos,trend_adx_neg,trend_cci,trend_visual_ichimoku_a,trend_visual_ichimoku_b,trend_aroon_up,trend_aroon_down,trend_aroon_ind,trend_psar_up,trend_psar_down,trend_psar_up_indicator,trend_psar_down_indicator,momentum_rsi,momentum_stoch_rsi,momentum_stoch_rsi_k,momentum_stoch_rsi_d,momentum_tsi,momentum_uo,momentum_stoch,momentum_stoch_signal,momentum_wr,momentum_ao,momentum_roc,momentum_ppo,momentum_ppo_signal,momentum_ppo_hist,momentum_pvo,momentum_pvo_signal,momentum_pvo_hist,momentum_kama,others_dr,others_dlr,others_cr
"datetime[ns, America/New_York]",f64,f64,f64,f64,f64,i64,f64,i64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,…,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2017-01-03 00:00:00 EST,14.07,14.07,12.85,12.85,12.85,0,-0.0,0,0.0,0.0,0.0,0.0,0.0,0.0,50.0,1000.0,12.85,12.85,12.85,0.0,0.0,0.0,0.0,13.256667,14.476666,12.036668,18.405823,0.333333,0.0,0.0,12.85,14.07,13.46,9.494158,0.0,0.0,…,13.46,13.46,0.0,0.0,0.0,0.0,0.0,20.47047,22.23312,0.0,0.0,0.0,10.94,14.07,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12.85,0.0,0.0,0.0
2017-01-04 00:00:00 EST,12.78,12.8,11.63,11.85,11.85,0,-0.0,0,0.0,-0.0,0.0,0.0,0.0,0.0,50.0,1000.0,12.35,13.35,11.35,16.194331,0.25,0.0,0.0,12.675,13.87,11.48,18.856011,0.154812,0.0,0.0,11.63,14.07,12.85,19.757081,0.090164,0.0,…,12.85,12.85,0.0,0.0,0.0,0.0,-66.666667,20.47047,22.23312,0.0,4.0,-4.0,10.94,14.07,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,9.016406,9.016406,4.508203,-90.983594,0.0,0.0,-0.624394,-0.124879,-0.499515,0.0,0.0,0.0,12.098375,-7.782101,-8.101594,-7.782101
2017-01-05 00:00:00 EST,11.96,12.09,11.4,11.67,11.67,0,-0.0,0,0.0,-0.0,0.0,0.0,0.0,0.0,50.0,1000.0,12.123334,13.161452,11.085215,17.125952,0.281656,0.0,0.0,12.356667,13.383333,11.33,16.61721,0.165584,0.0,0.0,11.4,14.07,12.735,22.023646,0.101124,0.0,…,12.735,12.735,0.0,0.0,0.0,0.0,-70.740755,20.47047,22.23312,0.0,8.0,-8.0,10.94,14.07,0.0,1.0,0.0,0.0,0.0,0.0,-100.0,15.654975,10.112376,6.376261,-89.887624,0.0,0.0,-1.226732,-0.345249,-0.881483,0.0,0.0,0.0,11.82578,-1.51899,-1.530645,-9.182881
2017-01-06 00:00:00 EST,11.7,11.74,10.98,11.32,11.32,0,-0.0,0,0.0,-0.0,0.0,0.0,0.0,0.0,50.0,1000.0,11.9225,13.059283,10.785717,19.069543,0.234998,0.0,0.0,12.104167,13.064167,11.144167,15.862307,0.09158,0.0,0.0,10.98,14.07,12.525,25.917384,0.110032,0.0,…,12.525,12.525,0.0,0.0,0.0,0.0,-87.635601,20.47047,22.23312,0.0,12.0,-12.0,10.94,14.0166,0.0,0.0,0.0,0.0,0.0,0.0,-100.0,21.336782,11.003241,10.044008,-88.996759,0.0,0.0,-1.916831,-0.659566,-1.257265,0.0,0.0,0.0,11.503803,-2.999146,-3.045041,-11.90662
2017-01-09 00:00:00 EST,11.71,12.08,11.46,11.56,11.56,0,-0.0,0,0.0,-0.0,0.0,0.0,0.0,0.0,50.0,1000.0,11.85,12.907318,10.792683,17.845024,0.362861,0.0,0.0,12.023333,12.915333,11.131333,14.837815,0.240284,0.0,0.0,10.98,14.07,12.525,26.07595,0.187703,0.0,…,12.525,12.525,0.0,0.0,0.0,0.0,-41.34695,20.47047,22.23312,0.0,12.0,-12.0,10.94,13.895136,0.0,0.0,15.780578,0.0,0.0,0.0,-99.449116,23.010784,18.770254,13.29529,-81.229746,0.0,0.0,-2.289756,-0.985604,-1.304152,0.0,0.0,0.0,11.534477,2.120148,2.097985,-10.03891
…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…,…
2022-05-24 00:00:00 EDT,29.43,31.07,29.040001,29.450001,29.450001,0,0.0,0,0.0,-0.0,0.0,0.0,0.0,0.0,50.0,1000.0,30.2785,34.956497,25.600503,30.899793,0.411447,0.0,0.0,29.862,33.064,26.660001,21.445312,0.435665,0.0,0.0,24.940001,36.639999,30.79,38.641276,0.38547,3.689493,…,29.164999,27.545,0.387715,19.594083,25.358799,18.759503,-29.457321,24.0525,28.120001,36.0,4.0,32.0,24.82736,33.75761,0.0,0.0,51.582605,0.365315,0.324785,0.339286,4.913171,38.043738,39.518564,38.568173,-60.481436,2.148147,-2.451142,2.457774,3.929137,-1.471363,0.0,0.0,0.0,27.349282,3.405903,3.349187,129.182879
2022-05-25 00:00:00 EDT,29.33,30.23,28.16,28.370001,28.370001,0,0.0,0,0.0,-0.0,0.0,0.0,0.0,0.0,50.0,1000.0,30.117,34.824285,25.409716,31.259983,0.314437,0.0,0.0,29.499333,32.538333,26.460334,20.603852,0.314193,0.0,0.0,24.940001,36.639999,30.79,38.848487,0.293162,3.527544,…,28.7525,27.545,0.193858,18.93692,24.315459,19.73676,-61.248668,24.485001,28.120001,32.0,0.0,32.0,24.82736,33.581258,0.0,0.0,49.473719,0.226121,0.279651,0.306363,4.076534,36.850185,28.686066,33.930321,-71.313934,2.035235,-18.35971,1.98724,3.540757,-1.553518,0.0,0.0,0.0,27.399636,-3.667232,-3.736166,120.77821
2022-05-26 00:00:00 EDT,28.42,28.459999,27.110001,27.5,27.5,0,0.0,0,0.0,-0.0,0.0,0.0,0.0,0.0,50.0,1000.0,29.9925,34.836367,25.148634,32.300516,0.242716,0.0,0.0,28.994,31.862,26.126,19.783401,0.23954,0.0,0.0,24.940001,36.639999,30.79,39.009748,0.218803,3.309789,…,28.7675,27.545,0.096929,17.943926,23.632561,21.366839,-99.693957,24.3525,28.120001,28.0,0.0,28.0,24.82736,33.408433,0.0,0.0,47.779114,0.114271,0.235236,0.279891,2.9048,37.911631,19.959879,29.38817,-80.040121,1.218912,-16.641411,1.356447,3.103895,-1.747448,0.0,0.0,0.0,27.404686,-3.066623,-3.114627,114.007776
2022-05-27 00:00:00 EDT,27.5,27.540001,25.57,25.719999,25.719999,0,0.0,0,0.0,-0.0,0.0,0.0,0.0,0.0,50.0,1000.0,29.6085,34.528038,24.688962,33.23058,0.10479,0.0,0.0,28.66,31.483,25.837,19.699929,-0.020723,0.0,1.0,24.940001,36.639999,30.79,39.515675,0.066667,3.17581,…,29.47,27.545,0.048464,16.836844,22.633608,23.768028,-136.634842,23.345,28.120001,24.0,0.0,24.0,24.82736,33.239064,0.0,0.0,44.426229,0.0,0.113464,0.20945,0.944023,25.257187,2.250794,16.96558,-97.749206,0.278647,-21.007376,0.348046,2.552725,-2.20468,0.0,0.0,0.0,27.343084,-6.47273,-6.691713,100.155631
