In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from utils.utils import plot_2axes
import random

# Check for Stationarity

In [2]:
df = yf.download('SPY', start='2000-01-01')
df

[*********************100%%**********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2000-01-03,148.250000,148.250000,143.875000,145.437500,93.591125,8164300
2000-01-04,143.531250,144.062500,139.640625,139.750000,89.931145,8089800
2000-01-05,139.937500,141.531250,137.250000,140.000000,90.092056,12177900
2000-01-06,139.625000,141.500000,137.750000,137.750000,88.644104,6227200
2000-01-07,140.312500,145.750000,140.062500,145.750000,93.792259,8066500
...,...,...,...,...,...,...
2024-05-03,511.160004,512.549988,508.559998,511.290009,511.290009,72756700
2024-05-06,513.750000,516.609985,513.299988,516.570007,516.570007,47264700
2024-05-07,517.559998,518.570007,516.450012,517.140015,517.140015,52561300
2024-05-08,515.260010,517.739990,515.140015,517.190002,517.190002,42047200


In [25]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=df.index, y=df['Adj Close'], name='Adj Close'))
fig.show()

In [33]:
logreturns = np.log(df['Adj Close']).diff().dropna()
fig = go.Figure()
fig.add_trace(go.Scatter(x=logreturns.index, y=logreturns, name='Log-Returns'))
fig.show()

In [15]:
from statsmodels.tsa.stattools import adfuller

In [21]:
def adf_test(series):
    print ('Results of Dickey-Fuller Test:')
    dftest = adfuller(series, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print (dfoutput)
# Call the function and run the test

adf_test(logreturns)

Results of Dickey-Fuller Test:
Test Statistic                  -19.282110
p-value                           0.000000
#Lags Used                       15.000000
Number of Observations Used    6020.000000
Critical Value (1%)              -3.431437
Critical Value (5%)              -2.862020
Critical Value (10%)             -2.567026
dtype: float64


# Examine Conditions

In [3]:
from dataLoader import *
real_data = load_dataset(data_mode='Test', output_size=10)




[*********************100%%**********************]  1 of 1 completed


X_train's shape is (4376, 1, 1, 3), X_test's shape is (485, 1, 1, 3)
y_train's label shape is (4376, 1, 1, 11), y_test's label shape is (485, 1, 1, 11)


In [4]:
real_data.df

Unnamed: 0_level_0,close,rates,logreturns,vix,100%60d,100%30d,rsi,rsi_z,vix_z,100%60d_z,100%30d_z
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2005-01-03,83.364357,2.32,-0.004727,14.08,12.9604,12.1911,62.356410,0.456564,-0.710134,1.201544,1.000550
2005-01-04,82.345688,2.33,-0.012295,13.98,13.0911,12.2949,43.495696,-0.495430,-0.761999,1.310677,1.081760
2005-01-05,81.777489,2.33,-0.006924,14.09,13.2074,12.5076,25.311277,-1.933521,-0.691566,1.406874,1.246050
2005-01-06,82.193245,2.31,0.005071,13.58,12.6558,11.9965,29.824358,-1.478711,-0.987285,0.942875,0.846420
2005-01-07,82.075432,2.32,-0.001434,13.49,12.5318,11.8700,27.638658,-1.670552,-1.035349,0.835785,0.744880
...,...,...,...,...,...,...,...,...,...,...,...
2024-05-03,511.290009,5.45,0.012319,13.49,12.6814,11.8588,70.228250,0.628805,-0.735900,-0.286706,-0.370131
2024-05-06,516.570007,5.45,0.010274,13.49,12.5238,11.8311,70.756353,0.642114,-0.729584,-0.384289,-0.382912
2024-05-07,517.140015,5.45,0.001103,13.23,12.3541,11.6032,66.307149,0.475388,-0.872822,-0.488945,-0.510131
2024-05-08,517.190002,5.45,0.000097,13.00,11.9472,11.2966,66.809355,0.491271,-0.999628,-0.755514,-0.689200


In [7]:
fig = px.scatter(real_data.df, x="100%60d_z", y="100%30d_z")
fig.show()

In [10]:
fig = px.scatter(real_data.df, x="100%60d_z", y="rsi_z")
fig.show()

In [11]:
fig = px.scatter_3d(real_data.df, x='100%60d_z', y='100%30d_z', z='rsi_z')
fig.show()

## Granger causality tests

In [4]:
from statsmodels.tsa.stattools import grangercausalitytests

def granger_causation_matrix(data, treatment_vars, outcome_vars=['close','logreturns'], test='ssr_chi2test', verbose=False, maxlag=42):    
    """Check Granger Causality of all possible combinations of the Time series.
    The rows are the response variable, columns are predictors. The values in the table 
    are the P-Values. P-Values lesser than the significance level (0.05), implies 
    the Null Hypothesis that the coefficients of the corresponding past values is 
    zero, that is, the X does not cause Y can be rejected.

    data      : pandas dataframe containing the time series variables
    variables : list containing names of the time series variables.
    """
    df_min = pd.DataFrame(np.zeros((len(outcome_vars), len(treatment_vars))), columns=treatment_vars, index=outcome_vars)
    df_mean = pd.DataFrame(np.zeros((len(outcome_vars), len(treatment_vars))), columns=treatment_vars, index=outcome_vars)
    for treatment in treatment_vars:
        for outcome in outcome_vars:
            test_result = grangercausalitytests(data[[outcome, treatment]].dropna(), maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'X = {treatment}, Y = {outcome}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            mean_p_value = np.mean(p_values)
            df_min.loc[outcome, treatment] = min_p_value
            df_mean.loc[outcome, treatment] = mean_p_value
    df_min.columns, df_mean.columns = treatment_vars, treatment_vars
    df_min.index, df_mean.index =outcome_vars, outcome_vars

    print()
    print("Min p-values")
    display(df_min)
    print("Mean p-values")
    display(df_mean)

In [5]:
conditions = ['rsi','vix','ivol'] + real_data.condition_names
if 'vix_z' not in conditions: conditions += ['vix_z']
if 'ivol_z' not in conditions: conditions += ['ivol_z']
granger_causation_matrix(real_data.df, treatment_vars=conditions, verbose=True, maxlag=10)

X = rsi, Y = close, P Values = [0.1449, 0.5979, 0.7167, 0.7871, 0.8697, 0.8921, 0.8947, 0.3364, 0.4191, 0.134]
X = rsi, Y = logreturns, P Values = [0.796, 0.2042, 0.2803, 0.4512, 0.3358, 0.1231, 0.2659, 0.0469, 0.1235, 0.0982]
X = vix, Y = close, P Values = [0.1201, 0.0085, 0.0104, 0.0003, 0.0016, 0.002, 0.0032, 0.001, 0.0004, 0.0006]
X = vix, Y = logreturns, P Values = [0.4085, 0.0023, 0.0016, 0.0006, 0.0016, 0.0043, 0.0032, 0.002, 0.0, 0.0]
X = ivol, Y = close, P Values = [0.1787, 0.0008, 0.0003, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
X = ivol, Y = logreturns, P Values = [0.4841, 0.0003, 0.0003, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
X = rsi_z, Y = close, P Values = [0.079, 0.7804, 0.8106, 0.5727, 0.7465, 0.7758, 0.8807, 0.2294, 0.27, 0.1501]
X = rsi_z, Y = logreturns, P Values = [0.5469, 0.4403, 0.4901, 0.749, 0.513, 0.3852, 0.4566, 0.0289, 0.0586, 0.0668]
X = ivol_z, Y = close, P Values = [0.8949, 0.0004, 0.0014, 0.0037, 0.0046, 0.0073, 0.0043, 0.011, 0.0373, 0.0409]
X = ivol_z, Y = logret

Unnamed: 0,rsi,vix,ivol,rsi_z,ivol_z,vix_z
close,0.134,0.0003,0.0,0.079,0.0004,0.0002
logreturns,0.0469,0.0,0.0,0.0289,0.0,0.0


Mean p-values


Unnamed: 0,rsi,vix,ivol,rsi_z,ivol_z,vix_z
close,0.57926,0.01481,0.01798,0.52952,0.10058,0.08815
logreturns,0.27251,0.04241,0.04847,0.37354,0.01878,0.02012


`'rsi', 'vix', 'ivol'` are the strongest features. <br>
Hence, we choose these conditions: `['rsi_z', 'vix_z', 'ivol_z']`

## VIX vs IVol

In [8]:
plot_2axes(real_data.df, 'vix', 'ivol')

In [12]:
np.corrcoef(real_data.df.ivol, real_data.df.vix)

array([[1.        , 0.98844867],
       [0.98844867, 1.        ]])

In [9]:
np.corrcoef(real_data.df.ivol_z, real_data.df.vix_z)

array([[1.        , 0.95625822],
       [0.95625822, 1.        ]])

In [10]:
granger_causation_matrix(real_data.df, treatment_vars=['ivol_z','vix_z'], outcome_vars=['ivol_z','vix_z'], verbose=True)



X = ivol_z, Y = ivol_z, P Values = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]




X = ivol_z, Y = vix_z, P Values = [0.0806, 0.2716, 0.492, 0.5892, 0.6427, 0.5636, 0.6187, 0.4636, 0.5146, 0.2948, 0.21, 0.2859, 0.1615, 0.1777, 0.0354, 0.034, 0.0434, 0.0287, 0.0369, 0.0079, 0.0099, 0.0132, 0.0135, 0.0112, 0.0151, 0.0145, 0.0202, 0.0101, 0.0122, 0.015, 0.0177, 0.0161, 0.0229, 0.0225, 0.026, 0.02, 0.0165, 0.0192, 0.0207, 0.0261, 0.0264, 0.0396]




X = vix_z, Y = ivol_z, P Values = [0.8554, 0.0004, 0.0015, 0.0039, 0.0054, 0.0061, 0.0122, 0.0031, 0.006, 0.0026, 0.0047, 0.0077, 0.0093, 0.0097, 0.0057, 0.0072, 0.0063, 0.0001, 0.0002, 0.0001, 0.0001, 0.0001, 0.0002, 0.0001, 0.0002, 0.0003, 0.0005, 0.0003, 0.0003, 0.0004, 0.0006, 0.0005, 0.0006, 0.0006, 0.0009, 0.001, 0.0012, 0.0013, 0.0024, 0.0032, 0.0043, 0.0066]




X = vix_z, Y = vix_z, P Values = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

Min p-values


Unnamed: 0,ivol_z,vix_z
ivol_z,1.0,0.0001
vix_z,0.0079,1.0


Mean p-values


Unnamed: 0,ivol_z,vix_z
ivol_z,1.0,0.023174
vix_z,0.141938,1.0


In [11]:
granger_causation_matrix(real_data.df, treatment_vars=['ivol','vix'], outcome_vars=['ivol','vix'], verbose=True)



X = ivol, Y = ivol, P Values = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]




X = ivol, Y = vix, P Values = [0.0, 0.0063, 0.0005, 0.0001, 0.0006, 0.0008, 0.0022, 0.0003, 0.0001, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]




X = vix, Y = ivol, P Values = [0.8907, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0001, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]




X = vix, Y = vix, P Values = [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0]

Min p-values


Unnamed: 0,ivol,vix
ivol,1.0,0.0
vix,0.0,1.0


Mean p-values


Unnamed: 0,ivol,vix
ivol,1.0,0.02121
vix,0.00026,1.0


## Check if normalised follow actual trend

In [12]:
plot_2axes(real_data.df, 'rsi', 'rsi_z')

In [8]:
plot_2axes(real_data.df, 'vix_z', 'logreturns')

In [4]:
plot_2axes(real_data.df, 'vix', 'vix_z')

In [14]:
plot_2axes(real_data.df, 'ivol', 'ivol_z')

In [15]:
plot_2axes(real_data.df, 'pc_ratio', 'pc_ratio_z')