In [25]:
# Data Management
import pandas as pd
import numpy as np
from ta import add_all_ta_features
from pandas_datareader.data import DataReader
import yfinance as yf

# Statistics
from statsmodels.tsa.stattools import adfuller

# Unsupervised Machine Learning
from sklearn.decomposition import PCA

# Supervised Machine Learning
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score

# Graphing
import matplotlib.pyplot as plt

In [26]:
# Remove Unwanted Warnings
import warnings
warnings.simplefilter(action="ignore", category=FutureWarning)
warnings.simplefilter(action="ignore", category=RuntimeWarning)
pd.options.mode.chained_assignment = None  # ignore column assingment tecnique warning (wants to use .iloc)

### Initial Data Extraction


In [27]:
# TIMEFRAME
start_date = "2017-01-01"
end_date = "2022-06-01"
symbol = "^VIX"




# FETCH USING YAHOO FINANCE 
# df = yf.download(symbol ,  start_date , end_date )


tickerData = yf.Ticker(symbol)
df = tickerData.history(period='1d', start=start_date, end=end_date)


# REFINE COLUMNS
df = df[["High" , "Low" ,  "Open", "Close" , "Volume"]]


## Feature Engineering

In [28]:
# Add Technical Analysis Columns
df = add_all_ta_features( 
    df , open="Open", high="High", low="Low", close="Close", volume="Volume" , fillna=True
 )

print(f"Rows (dates): {df.shape[0]}\nColumns (technical analysis): {df.shape[1]}")

Rows (dates): 1362
Columns (technical analysis): 91


In [29]:
non_stationaries = []
columns_with_constants = []

for col in df.columns:
    # Check if column values are all the same
    if df[col].nunique() <= 1:
        columns_with_constants.append(col)
        print(f"Skipping constant column: {col}")
        continue

    # Find variance in column numbers
    dftest = adfuller(df[col].values)
    
    # Get p-value
    p_value = dftest[1]
    
    # Perform t-test
    t_test = dftest[0] < dftest[4]["1%"]
    
    # Check if non stationary
    if p_value > 0.05  or not t_test:
        non_stationaries.append(col)

print(f"Non-Stationaries found:  {len(non_stationaries)}")


Skipping constant column: Volume
Skipping constant column: volume_adi
Skipping constant column: volume_obv
Skipping constant column: volume_cmf
Skipping constant column: volume_fi
Skipping constant column: volume_em
Skipping constant column: volume_sma_em
Skipping constant column: volume_vpt
Skipping constant column: volume_vwap
Skipping constant column: volume_mfi
Skipping constant column: volume_nvi
Skipping constant column: momentum_pvo
Skipping constant column: momentum_pvo_signal
Skipping constant column: momentum_pvo_hist
Non-Stationaries found:  15


In [30]:
# Convert Non-Stationaries into Stationaries
df_stationary = df.copy()


# Find all Nonstationaries  &  Turn them into percent change instead
df_stationary[non_stationaries] = df_stationary[non_stationaries].pct_change()  


#Remove because First column is the names thus [0,0,0,0...] 
df_stationary = df_stationary.iloc[1:]

In [31]:
# Remove if Column has any NaN Rows
na_list = df_stationary.columns[df_stationary.isna().any().tolist()]
df_stationary.drop(columns=na_list , inplace=True)

In [32]:
# Handle Infinity Values
df_stationary = df_stationary.replace( [np.inf , -np.inf] , 0 )

print(df_stationary.shape)
df_stationary

(1361, 91)


Unnamed: 0_level_0,High,Low,Open,Close,Volume,volume_adi,volume_obv,volume_cmf,volume_fi,volume_em,...,momentum_ppo,momentum_ppo_signal,momentum_ppo_hist,momentum_pvo,momentum_pvo_signal,momentum_pvo_hist,momentum_kama,others_dr,others_dlr,others_cr
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2017-01-04 00:00:00-06:00,12.800000,-0.094942,12.780000,11.850000,0,-0.0,0,0.0,-0.0,0.0,...,-0.624394,-0.124879,-0.499515,0.0,0.0,0.0,-0.058492,-7.782101,-8.101594,-7.782101
2017-01-05 00:00:00-06:00,12.090000,-0.019776,11.960000,11.670000,0,-0.0,0,0.0,-0.0,0.0,...,-1.226732,-0.345249,-0.881483,0.0,0.0,0.0,-0.022532,-1.518990,-1.530645,-9.182881
2017-01-06 00:00:00-06:00,11.740000,-0.036842,11.700000,11.320000,0,-0.0,0,0.0,-0.0,0.0,...,-1.916831,-0.659566,-1.257265,0.0,0.0,0.0,-0.027227,-2.999146,-3.045041,-11.906620
2017-01-09 00:00:00-06:00,12.080000,0.043716,11.710000,11.560000,0,-0.0,0,0.0,-0.0,0.0,...,-2.289756,-0.985604,-1.304152,0.0,0.0,0.0,0.002666,2.120148,2.097985,-10.038910
2017-01-10 00:00:00-06:00,11.790000,-0.013089,11.590000,11.490000,0,-0.0,0,0.0,-0.0,0.0,...,-2.607109,-1.309905,-1.297204,0.0,0.0,0.0,-0.002328,-0.605542,-0.607383,-10.583662
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2022-05-24 00:00:00-05:00,31.070000,0.026511,29.430000,29.450001,0,0.0,0,0.0,-0.0,0.0,...,2.457774,3.929137,-1.471363,0.0,0.0,0.0,0.003322,3.405903,3.349187,129.182879
2022-05-25 00:00:00-05:00,30.230000,-0.030303,29.330000,28.370001,0,0.0,0,0.0,-0.0,0.0,...,1.987240,3.540757,-1.553518,0.0,0.0,0.0,0.001841,-3.667232,-3.736166,120.778210
2022-05-26 00:00:00-05:00,28.459999,-0.037287,28.420000,27.500000,0,0.0,0,0.0,-0.0,0.0,...,1.356447,3.103895,-1.747448,0.0,0.0,0.0,0.000184,-3.066623,-3.114627,114.007776
2022-05-27 00:00:00-05:00,27.540001,-0.056806,27.500000,25.719999,0,0.0,0,0.0,-0.0,0.0,...,0.348046,2.552725,-2.204680,0.0,0.0,0.0,-0.002248,-6.472730,-6.691713,100.155631
