# Resources Referenced:
- https://towardsdatascience.com/a-quick-introduction-on-granger-causality-testing-for-time-series-analysis-7113dc9420d2
- https://www.statology.org/granger-causality-test-in-python/
- https://www.machinelearningplus.com/time-series/granger-causality-test-in-python/
- https://rishi-a.github.io/2020/05/25/granger-causality.html

In [1]:
import pandas as pd
import numpy as np

In [2]:
# Create dataframes from crypto and NASDAQ data
dfCrypto = pd.read_csv("CleanedMergedCryptoSet.csv") # Cryptocurrency dataframe with dates and price
dfNASDAQ = pd.read_csv("NASDAQ_HistData.csv") # NASDAQ dataframe with dates and average

In [3]:
# Set the 'Date' column to a datetime variable
dfCrypto['Date'] =  pd.to_datetime(dfCrypto['Date'])
dfNASDAQ['Date'] =  pd.to_datetime(dfNASDAQ['Date'])

In [4]:
# Merge both datasets together based on 'Date'
dfMerged = pd.merge(dfCrypto, dfNASDAQ, how='outer', on='Date')

In [5]:
dfMerged = dfMerged.set_index('Date').rename_axis('Price/Average', axis=1)

In [6]:
# Drop NA values from merged dataset
dfMerged = dfMerged.dropna()

In [7]:
# Rename the 'Average' column to 'NASDAQ_Average' for clarity
dfMerged = dfMerged.rename(columns={'Average':'NASDAQ_Average'})

In [8]:
# We had 2546 rows from the cryptocurrency dataset and 1860 rows from the NASDAQ dataset
# Dropping NA values (one dataset's dates were out of range of the other dataset's) we now have 1,280 rows
dfMerged

Price/Average,BNB_price,XRP_price,LTC_price,ETH_price,BTC_price,NASDAQ_Average
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-09-18,0.173491,0.189366,53.671766,287.961096,4012.285825,6458.090
2017-09-19,0.168334,0.183936,51.949089,280.445632,3898.934732,6457.270
2017-09-20,0.166628,0.182440,50.277911,279.417649,3858.020951,6440.140
2017-09-21,0.095817,0.171347,46.308421,257.465075,3628.058588,6426.935
2017-09-22,0.094213,0.173483,47.667821,263.469774,3622.183039,6415.175
...,...,...,...,...,...,...
2022-10-11,271.885899,0.497324,52.734480,1291.081660,19142.694770,10480.410
2022-10-12,271.366431,0.485334,52.027246,1279.883308,19058.628040,10433.370
2022-10-13,270.923086,0.488039,52.198877,1294.271051,19153.039780,10393.270
2022-10-14,271.808170,0.480833,51.217335,1288.164015,19383.943220,10537.750


# ADF-Test

In [9]:
n_obs = 20
dfTrain, dfTest = dfMerged[0:-n_obs], dfMerged[-n_obs:]

from statsmodels.tsa.stattools import adfuller

def adf_test(df):
    result = adfuller(df.values)
    print('ADF Statistics: %f' % result[0])
    print('p-value: %f' % result[1])
    print('Critical values:')
    for key, value in result[4].items():
        print('\t%s: %.3f' % (key, value))
        
print('BNB')
adf_test(dfTrain['BNB_price'])
print('XRP')
adf_test(dfTrain['XRP_price'])
print('LTC')
adf_test(dfTrain['LTC_price'])
print('ETH')
adf_test(dfTrain['ETH_price'])
print('BTC')
adf_test(dfTrain['BTC_price'])
print('NASDAQ')
adf_test(dfTrain['NASDAQ_Average'])

BNB
ADF Statistics: -1.789976
p-value: 0.385449
Critical values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
XRP
ADF Statistics: -3.823326
p-value: 0.002677
Critical values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
LTC
ADF Statistics: -2.772021
p-value: 0.062383
Critical values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
ETH
ADF Statistics: -1.703856
p-value: 0.429127
Critical values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
BTC
ADF Statistics: -1.729684
p-value: 0.415887
Critical values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
NASDAQ
ADF Statistics: -1.306811
p-value: 0.626050
Critical values:
	1%: -3.436
	5%: -2.864
	10%: -2.568


Looking at the results above using the ADF-test we can see that the stationary and non-stationary values are:

Stationary:
- XRP

Non-stationary:
- BNB
- LTC
- ETH
- BTC
- NASDAQ

# KPSS-test

In [10]:
from statsmodels.tsa.stattools import kpss

def kpss_test(dfMerged):    
    statistic, p_value, n_lags, critical_values = kpss(dfMerged.values)
    
    print(f'KPSS Statistic: {statistic}')
    print(f'p-value: {p_value}')
    print(f'num lags: {n_lags}')
    print('Critial Values:')
    for key, value in critical_values.items():
        print(f'   {key} : {value}')
        
print('BNB')
kpss_test(dfTrain['BNB_price'])
print('XRP')
kpss_test(dfTrain['XRP_price'])
print('LTC')
kpss_test(dfTrain['LTC_price'])
print('ETH')
kpss_test(dfTrain['ETH_price'])
print('BTC')
kpss_test(dfTrain['BTC_price'])
print('NASDAQ')
kpss_test(dfTrain['NASDAQ_Average'])

BNB
KPSS Statistic: 3.542810933268183
p-value: 0.01
num lags: 23
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
XRP
KPSS Statistic: 0.6485178686618407
p-value: 0.018225648303469025
num lags: 23
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
LTC
KPSS Statistic: 0.6033102629417053
p-value: 0.02233543064166315
num lags: 23
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
ETH
KPSS Statistic: 3.1430455231308247
p-value: 0.01
num lags: 23
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
BTC
KPSS Statistic: 3.3664195474239063
p-value: 0.01
num lags: 23
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
NASDAQ
KPSS Statistic: 4.60851820482573
p-value: 0.01
num lags: 23
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739


look-up table. The actual p-value is smaller than the p-value returned.

look-up table. The actual p-value is smaller than the p-value returned.

look-up table. The actual p-value is smaller than the p-value returned.

look-up table. The actual p-value is smaller than the p-value returned.



Looking at the results above using the KPSS-test we can see that the stationary and non-stationary values are:

Non-stationary:
- XRP
- BNB
- LTC
- ETH
- BTC
- NASDAQ

# Cross-Referencing ADF-test and KPSS-test
For the ADF-test, we saw that all variables were non-stationary except XRP, however for the KPSS-test we saw that XRP was also non-stationary.

# Difference Method
In order to make the data stationary, we have to perform the difference method

In [11]:
dfTrainDiff = dfTrain.diff().dropna()

# Re-Performing ADF-test and KPSS-test

In [12]:
print('BNB')
adf_test(dfTrainDiff['BNB_price'])
print('XRP')
adf_test(dfTrainDiff['XRP_price'])
print('LTC')
adf_test(dfTrainDiff['LTC_price'])
print('ETH')
adf_test(dfTrainDiff['ETH_price'])
print('BTC')
adf_test(dfTrainDiff['BTC_price'])
print('NASDAQ')
adf_test(dfTrainDiff['NASDAQ_Average'])

BNB
ADF Statistics: -6.210330
p-value: 0.000000
Critical values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
XRP
ADF Statistics: -10.682074
p-value: 0.000000
Critical values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
LTC
ADF Statistics: -11.259681
p-value: 0.000000
Critical values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
ETH
ADF Statistics: -10.650479
p-value: 0.000000
Critical values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
BTC
ADF Statistics: -6.019321
p-value: 0.000000
Critical values:
	1%: -3.436
	5%: -2.864
	10%: -2.568
NASDAQ
ADF Statistics: -23.372049
p-value: 0.000000
Critical values:
	1%: -3.436
	5%: -2.864
	10%: -2.568


In [13]:
print('BNB')
kpss_test(dfTrainDiff['BNB_price'])
print('XRP')
kpss_test(dfTrainDiff['XRP_price'])
print('LTC')
kpss_test(dfTrainDiff['LTC_price'])
print('ETH')
kpss_test(dfTrainDiff['ETH_price'])
print('BTC')
kpss_test(dfTrainDiff['BTC_price'])
print('NASDAQ')
kpss_test(dfTrainDiff['NASDAQ_Average'])

BNB
KPSS Statistic: 0.0623878470162051
p-value: 0.1
num lags: 23
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
XRP
KPSS Statistic: 0.030166074686804323
p-value: 0.1
num lags: 23
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
LTC
KPSS Statistic: 0.05282065526373191
p-value: 0.1
num lags: 23
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
ETH
KPSS Statistic: 0.10094906169675083
p-value: 0.1
num lags: 23
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
BTC
KPSS Statistic: 0.12718897885610153
p-value: 0.1
num lags: 23
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739
NASDAQ
KPSS Statistic: 0.17830618323303368
p-value: 0.1
num lags: 23
Critial Values:
   10% : 0.347
   5% : 0.463
   2.5% : 0.574
   1% : 0.739


look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.

look-up table. The actual p-value is greater than the p-value returned.



# Re-Performance Results
- For the ADF-Test, all p-values were under 0.05, suggesting that the time series are stationary.
- For the KPSS-test, all p-values are over 0.05, suggesting that the time series is stationary.

Both tests now suggest our transformed data as stationary



In [14]:
from statsmodels.tsa.api import VAR

model = VAR(dfTrainDiff)
for i in range(1,16):
    result = model.fit(i)
    print('Lag Order =', i)
    print('AIC : ', result.aic)
    print('BIC : ', result.bic)
    print('FPE : ', result.fpe)
    print('HQIC: ', result.hqic, '\n')



Lag Order = 1
AIC :  34.63209521325939
BIC :  34.80361007364487
FPE :  1097812438556535.1
HQIC:  34.696552962642976 

Lag Order = 2
AIC :  34.57451852530859
BIC :  34.89325018025691
FPE :  1036393144941407.9
HQIC:  34.69430717822998 

Lag Order = 3
AIC :  34.571483262578724
BIC :  35.03762048993637
FPE :  1033261925879775.8
HQIC:  34.746678132349196 

Lag Order = 4
AIC :  34.54695943476322
BIC :  35.16069140905395
FPE :  1008248499385529.1
HQIC:  34.77763599728201 

Lag Order = 5
AIC :  34.53889523689647
BIC :  35.300411530470534
FPE :  1000179310209477.6
HQIC:  34.82512913112935 

Lag Order = 6
AIC :  34.502923767114105
BIC :  35.41241435130159
FPE :  964881889077420.5
HQIC:  34.84479079557371 

Lag Order = 7
AIC :  34.50237258061006
BIC :  35.56002782687866
FPE :  964407217989788.8
HQIC:  34.8999487098381 

Lag Order = 8
AIC :  34.47591246979267
BIC :  35.68192315090959
FPE :  939297206690474.4
HQIC:  34.92927383084386 

Lag Order = 9
AIC :  34.497204578967
BIC :  35.851761870165234


In [23]:
from statsmodels.tsa.stattools import grangercausalitytests

maxlag=100
test = 'ssr_chi2test'

def grangers_causation_matrix(data, variables, test='ssr_chi2test', verbose=False):    
   
    df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in df.columns:
        for r in df.index:
            test_result = grangercausalitytests(data[[r, c]], maxlag=maxlag, verbose=False)
            p_values = [round(test_result[i+1][0][test][1],4) for i in range(maxlag)]
            if verbose: print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            df.loc[r, c] = min_p_value
    df.columns = [var + '_x' for var in variables]
    df.index = [var + '_y' for var in variables]
    return df

grangers_causation_matrix(dfTrainDiff, variables = dfTrainDiff.columns)


Unnamed: 0,BNB_price_x,XRP_price_x,LTC_price_x,ETH_price_x,BTC_price_x,NASDAQ_Average_x
BNB_price_y,1.0,0.0,0.0,0.0,0.0,0.0
XRP_price_y,0.0,1.0,0.0,0.0,0.0,0.0
LTC_price_y,0.0,0.0,1.0,0.0,0.0,0.0
ETH_price_y,0.0,0.0,0.0,1.0,0.0,0.0
BTC_price_y,0.0,0.0,0.0,0.0,1.0,0.0
NASDAQ_Average_y,0.0001,0.5964,0.3519,0.0,0.0,1.0
