In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from utils.utils import plot_2axes
import random

# Check for Stationarity

In [10]:
df = yf.download('SPY', start='2000-01-01')
df

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-03,148.250000,148.250000,143.875000,145.437500,93.881950,8164300
2000-01-04,143.531250,144.062500,139.640625,139.750000,90.210556,8089800
2000-01-05,139.937500,141.531250,137.250000,140.000000,90.371933,12177900
2000-01-06,139.625000,141.500000,137.750000,137.750000,88.919563,6227200
2000-01-07,140.312500,145.750000,140.062500,145.750000,94.083687,8066500
...,...,...,...,...,...,...
2023-12-22,473.859985,475.380005,471.700012,473.649994,473.649994,67126600
2023-12-26,474.070007,476.579987,473.989990,475.649994,475.649994,55387000
2023-12-27,475.440002,476.660004,474.890015,476.510010,476.510010,68000300
2023-12-28,476.880005,477.549988,476.260010,476.690002,476.690002,77158100


In [25]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['Adj Close'], name='Adj Close'))
fig.show()

In [33]:
logreturns = np.log(df['Adj Close']).diff().dropna()
fig = go.Figure()
fig.add_trace(go.Scatter(x=logreturns.index, y=logreturns, name='Log-Returns'))
fig.show()

In [15]:
from statsmodels.tsa.stattools import adfuller

In [21]:
def adf_test(series):
    print ('Results of Dickey-Fuller Test:')
    dftest = adfuller(series, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print (dfoutput)
# Call the function and run the test

adf_test(logreturns)

Results of Dickey-Fuller Test:
Test Statistic                  -19.282110
p-value                           0.000000
#Lags Used                       15.000000
Number of Observations Used    6020.000000
Critical Value (1%)              -3.431437
Critical Value (5%)              -2.862020
Critical Value (10%)             -2.567026
dtype: float64


# Examine Conditions

In [2]:
from dataLoader import *
real_data = load_dataset(data_mode='Test')




[*********************100%%**********************]  1 of 1 completed


X_train's shape is (5680, 1, 1, 2), X_test's shape is (628, 1, 1, 2)
y_train's label shape is (5680, 1, 1, 43), y_test's label shape is (628, 1, 1, 43)


In [3]:
real_data.df

Unnamed: 0_level_0,close,rates,logreturns,vix,ivol,pc_ratio,rsi,rsi_z,vix_z,ivol_z,pc_ratio_z
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
1999-01-19,79.702530,4.41,0.006512,29.240000,,,56.546583,0.294196,1.610980,,
1999-01-20,80.339142,4.37,0.007956,28.600000,,,55.447436,0.240476,1.422281,,
1999-01-21,78.210335,4.32,-0.026855,30.920000,,,35.999866,-0.941582,2.087318,,
1999-01-22,78.031235,4.36,-0.002293,31.950001,,,36.770174,-0.883640,2.366723,,
1999-01-25,78.827087,4.44,0.010147,31.129999,,,37.977187,-0.795247,2.145031,,
...,...,...,...,...,...,...,...,...,...,...,...
2024-04-08,518.719971,5.43,0.000559,15.190000,13.1626,1.5455,44.735586,-0.629377,0.108656,0.144565,1.648083
2024-04-09,519.320007,5.43,0.001156,14.980000,13.1329,1.3826,49.013558,-0.383138,0.013958,0.125363,0.156899
2024-04-10,514.119995,5.45,-0.010064,15.800000,13.7405,1.3688,41.330821,-0.828173,0.412373,0.497369,0.030769
2024-04-11,518.000000,5.45,0.007519,14.910000,13.1092,1.3719,40.270972,-0.890071,-0.009164,0.105352,0.066827


## Granger causality tests

In [4]:
from statsmodels.tsa.stattools import grangercausalitytests

def granger_causation_matrix(data, treatment_vars, outcome_vars=['close','logreturns'], test='ssr_chi2test', verbose=False, maxlag=42):    
    """Check Granger Causality of all possible combinations of the Time series.
    The rows are the response variable, columns are predictors. The values in the table 
    are the P-Values. P-Values lesser than the significance level (0.05), implies 
    the Null Hypothesis that the coefficients of the corresponding past values is 
    zero, that is, the X does not cause Y can be rejected.

    data      : pandas dataframe containing the time series variables
    variables : list containing names of the time series variables.
    """
    df_min = pd.DataFrame(np.zeros((len(outcome_vars), len(treatment_vars))), columns=treatment_vars, index=outcome_vars)
    df_mean = pd.DataFrame(np.zeros((len(outcome_vars), len(treatment_vars))), columns=treatment_vars, index=outcome_vars)
    for treatment in treatment_vars:
        for outcome in outcome_vars:
            test_result = grangercausalitytests(data[[outcome, treatment]].dropna(), maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'X = {treatment}, Y = {outcome}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            mean_p_value = np.mean(p_values)
            df_min.loc[outcome, treatment] = min_p_value
            df_mean.loc[outcome, treatment] = mean_p_value
    df_min.columns, df_mean.columns = treatment_vars, treatment_vars
    df_min.index, df_mean.index =outcome_vars, outcome_vars

    print()
    print("Min p-values")
    display(df_min)
    print("Mean p-values")
    display(df_mean)

In [7]:
conditions = ['rsi','vix','ivol'] + real_data.condition_names
if 'vix_z' not in conditions: conditions += ['vix_z']
if 'ivol_z' not in conditions: conditions += ['ivol_z']
granger_causation_matrix(real_data.df, treatment_vars=conditions, verbose=True)

X = rsi, Y = close, P Values = [0.0658, 0.4001, 0.3962, 0.5954, 0.6703, 0.7371, 0.7853, 0.1464, 0.2024, 0.0219, 0.0385, 0.0332, 0.0226, 0.0179, 0.0138, 0.0115, 0.0036, 0.0015, 0.0012, 0.0009, 0.0004, 0.0006, 0.0018, 0.0024, 0.0024, 0.0054, 0.016, 0.0351, 0.0375, 0.0513, 0.0956, 0.0696, 0.102, 0.1077, 0.0997, 0.1372, 0.1019, 0.1159, 0.1199, 0.0844, 0.0915, 0.1407]
X = rsi, Y = logreturns, P Values = [0.2536, 0.2395, 0.4234, 0.6059, 0.4069, 0.1817, 0.2595, 0.0539, 0.154, 0.077, 0.0894, 0.0379, 0.048, 0.0466, 0.0578, 0.0043, 0.0017, 0.0007, 0.0004, 0.0003, 0.0003, 0.0004, 0.0007, 0.0009, 0.001, 0.0016, 0.0007, 0.0008, 0.0007, 0.0005, 0.0003, 0.0004, 0.0007, 0.0001, 0.0001, 0.0002, 0.0003, 0.0003, 0.0005, 0.0006, 0.0007, 0.0008]
X = vix, Y = close, P Values = [0.0692, 0.0129, 0.0155, 0.0005, 0.002, 0.0026, 0.0039, 0.0012, 0.0009, 0.0007, 0.0005, 0.0006, 0.0001, 0.0002, 0.0004, 0.0003, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0

Unnamed: 0,rsi,vix,ivol,rsi_z,vix_z,ivol_z
close,0.0004,0.0,0.0,0.0079,0.0,0.0004
logreturns,0.0001,0.0,0.0,0.0017,0.0,0.0


Mean p-values


Unnamed: 0,rsi,vix,ivol,rsi_z,vix_z,ivol_z
close,0.132967,0.002655,0.004281,0.154393,0.020071,0.048033
logreturns,0.07036,0.005145,0.01154,0.067293,0.022767,0.004626


`'rsi', 'vix', 'ivol'` are the strongest features. <br>
Hence, we choose these conditions: `['rsi_z', 'vix_z', 'ivol_z']`

## VIX vs IVol

In [8]:
plot_2axes(real_data.df, 'vix', 'ivol')

In [12]:
np.corrcoef(real_data.df.ivol, real_data.df.vix)

array([[1.        , 0.98844867],
       [0.98844867, 1.        ]])

In [9]:
np.corrcoef(real_data.df.ivol_z, real_data.df.vix_z)

array([[1.        , 0.95625822],
       [0.95625822, 1.        ]])

In [10]:
granger_causation_matrix(real_data.df, treatment_vars=['ivol_z','vix_z'], outcome_vars=['ivol_z','vix_z'], verbose=True)



X = ivol_z, Y = ivol_z, P Values = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]




X = ivol_z, Y = vix_z, P Values = [0.0806, 0.2716, 0.492, 0.5892, 0.6427, 0.5636, 0.6187, 0.4636, 0.5146, 0.2948, 0.21, 0.2859, 0.1615, 0.1777, 0.0354, 0.034, 0.0434, 0.0287, 0.0369, 0.0079, 0.0099, 0.0132, 0.0135, 0.0112, 0.0151, 0.0145, 0.0202, 0.0101, 0.0122, 0.015, 0.0177, 0.0161, 0.0229, 0.0225, 0.026, 0.02, 0.0165, 0.0192, 0.0207, 0.0261, 0.0264, 0.0396]




X = vix_z, Y = ivol_z, P Values = [0.8554, 0.0004, 0.0015, 0.0039, 0.0054, 0.0061, 0.0122, 0.0031, 0.006, 0.0026, 0.0047, 0.0077, 0.0093, 0.0097, 0.0057, 0.0072, 0.0063, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0002, 0.0001, 0.0002, 0.0003, 0.0005, 0.0003, 0.0003, 0.0004, 0.0006, 0.0005, 0.0006, 0.0006, 0.0009, 0.001, 0.0012, 0.0013, 0.0024, 0.0032, 0.0043, 0.0066]




X = vix_z, Y = vix_z, P Values = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

Min p-values


Unnamed: 0,ivol_z,vix_z
ivol_z,1.0,0.0001
vix_z,0.0079,1.0


Mean p-values


Unnamed: 0,ivol_z,vix_z
ivol_z,1.0,0.023174
vix_z,0.141938,1.0


In [11]:
granger_causation_matrix(real_data.df, treatment_vars=['ivol','vix'], outcome_vars=['ivol','vix'], verbose=True)



X = ivol, Y = ivol, P Values = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]




X = ivol, Y = vix, P Values = [0.0, 0.0063, 0.0005, 0.0001, 0.0006, 0.0008, 0.0022, 0.0003, 0.0001, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]




X = vix, Y = ivol, P Values = [0.8907, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0001, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]




X = vix, Y = vix, P Values = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

Min p-values


Unnamed: 0,ivol,vix
ivol,1.0,0.0
vix,0.0,1.0


Mean p-values


Unnamed: 0,ivol,vix
ivol,1.0,0.02121
vix,0.00026,1.0


## Check if normalised follow actual trend

In [12]:
plot_2axes(real_data.df, 'rsi', 'rsi_z')

In [4]:
plot_2axes(real_data.df, 'vix', 'vix_z')

In [14]:
plot_2axes(real_data.df, 'ivol', 'ivol_z')

In [15]:
plot_2axes(real_data.df, 'pc_ratio', 'pc_ratio_z')