In [69]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
from statsmodels.tsa.vector_ar.var_model import VAR
from statsmodels.tsa.stattools import adfuller, kpss, grangercausalitytests
import warnings
warnings.filterwarnings("ignore")

In [70]:
post_df = pd.read_csv('./out_data/posts output folder/first_clean_posts_18_09_2024.csv') 
comb_5min_intra = pd.read_csv('./out_data/Stock output folder/combined_5min_stockdata.csv')
GME_daily = pd.read_csv('./out_data/Stock output folder/GME_daily_stockdata.csv')
IWM_daily = pd.read_csv('./out_data/Stock output folder/IWM_daily_stockdata.csv')
ODP_daily = pd.read_csv('./out_data/Stock output folder/ODP_daily_stockdata.csv')
BBY_daily = pd.read_csv('./out_data/Stock output folder/BBY_daily_stockdata.csv')
FCFS_daily = pd.read_csv('./out_data/Stock output folder/FCFS_daily_stockdata.csv')

In [71]:
#aggregating at daily level posts activity 
post_ts_df = post_df[['num_comments','score','created_et_date']]
post_ts_df['created_et_date'] = pd.to_datetime(post_ts_df['created_et_date'])
post_ts_df=post_ts_df[post_ts_df['created_et_date'] >= '2020-12-15']
post_ts_df.head()

Unnamed: 0,num_comments,score,created_et_date
475,0,1,2020-12-15
476,0,1,2020-12-15
477,0,1,2020-12-15
478,0,1,2020-12-15
479,0,1,2020-12-15


In [72]:
post_daily = post_ts_df.groupby("created_et_date").agg(sum_comments=("num_comments", "sum"), max_score=("score", "max")).reset_index()
post_daily.head()

Unnamed: 0,created_et_date,sum_comments,max_score
0,2020-12-15,56000,34925
1,2020-12-16,55200,30176
2,2020-12-17,58486,28735
3,2020-12-18,79571,27521
4,2020-12-19,11432,20093


In [73]:
#merge datasets
datasets = {
    "GME": GME_daily,
    "IWM": IWM_daily,
    "ODP": ODP_daily,
    "BBY": BBY_daily,
    "FCFS": FCFS_daily
}

combined_daily_df = None

for name, df in datasets.items():
    df.rename(columns={"Close": f"{name}_close", "Return": f"{name}_return"}, inplace=True)

    if combined_daily_df is None:
        combined_daily_df = df  
    else:
        combined_daily_df = combined_daily_df.merge(df, on="DateTime", how="outer")

In [74]:
combined_daily_df

Unnamed: 0,DateTime,GME_close,GME_return,IWM_close,IWM_return,ODP_close,ODP_return,BBY_close,BBY_return,FCFS_close,FCFS_return
0,2020-12-15,3.4625,,185.0523,,28.04,,88.6052,,70.3188,
1,2020-12-16,3.4625,0.0,184.3869,-0.003596,27.33,-0.025321,89.044,0.004952,68.9738,-0.019127
2,2020-12-17,3.7075,0.070758,186.5827,0.011909,28.35,0.037322,88.046,-0.011208,69.5516,0.008377
3,2020-12-18,3.9075,0.053945,185.6797,-0.00484,28.52,0.005996,87.7105,-0.003811,68.5476,-0.014435
4,2020-12-21,3.8825,-0.006398,185.5846,-0.000512,28.73,0.007363,88.8719,0.013241,66.6722,-0.027359
5,2020-12-22,4.865,0.253059,187.6948,0.011371,28.21,-0.0181,89.586,0.008035,65.6966,-0.014633
6,2020-12-23,5.1425,0.05704,189.2917,0.008508,28.51,0.010635,88.046,-0.01719,65.1851,-0.007786
7,2020-12-24,5.0375,-0.020418,189.1682,-0.000652,28.63,0.004209,88.2267,0.002052,66.3501,0.017872
8,2020-12-28,5.2475,0.041687,188.4648,-0.003718,29.69,0.037024,88.2525,0.000292,66.9184,0.008565
9,2020-12-29,4.845,-0.076703,184.8147,-0.019368,29.21,-0.016167,86.6867,-0.017742,64.3895,-0.037791


In [75]:
#merge with post data now 
combined_daily_df['DateTime'] = pd.to_datetime(combined_daily_df['DateTime'])
post_daily['created_et_date'] = pd.to_datetime(post_daily['created_et_date'])

# Perform a left merge
ts_df = combined_daily_df.merge(post_daily, left_on='DateTime', right_on='created_et_date', how='left')


In [76]:
#ts_df=ts_df[ts_df['created_et_date'] >= '2021-01-01']
ts_df = ts_df.drop(columns=['created_et_date'])
ts_df.set_index('DateTime', inplace=True)

In [77]:
ts_df

Unnamed: 0_level_0,GME_close,GME_return,IWM_close,IWM_return,ODP_close,ODP_return,BBY_close,BBY_return,FCFS_close,FCFS_return,sum_comments,max_score
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2020-12-15,3.4625,,185.0523,,28.04,,88.6052,,70.3188,,56000,34925
2020-12-16,3.4625,0.0,184.3869,-0.003596,27.33,-0.025321,89.044,0.004952,68.9738,-0.019127,55200,30176
2020-12-17,3.7075,0.070758,186.5827,0.011909,28.35,0.037322,88.046,-0.011208,69.5516,0.008377,58486,28735
2020-12-18,3.9075,0.053945,185.6797,-0.00484,28.52,0.005996,87.7105,-0.003811,68.5476,-0.014435,79571,27521
2020-12-21,3.8825,-0.006398,185.5846,-0.000512,28.73,0.007363,88.8719,0.013241,66.6722,-0.027359,63608,31122
2020-12-22,4.865,0.253059,187.6948,0.011371,28.21,-0.0181,89.586,0.008035,65.6966,-0.014633,69434,63773
2020-12-23,5.1425,0.05704,189.2917,0.008508,28.51,0.010635,88.046,-0.01719,65.1851,-0.007786,57609,24548
2020-12-24,5.0375,-0.020418,189.1682,-0.000652,28.63,0.004209,88.2267,0.002052,66.3501,0.017872,38770,15892
2020-12-28,5.2475,0.041687,188.4648,-0.003718,29.69,0.037024,88.2525,0.000292,66.9184,0.008565,55811,17616
2020-12-29,4.845,-0.076703,184.8147,-0.019368,29.21,-0.016167,86.6867,-0.017742,64.3895,-0.037791,58417,14890


In [78]:
#kpss test
def kpss_test(data_df):
    test_stat, p_val = [], []
    cv_5pct, cv_10pct = [], []
    for c in data_df.columns: 
        kpss_res = kpss(data_df[c].dropna(), regression='ct')
        test_stat.append(kpss_res[0])
        p_val.append(kpss_res[1])
        cv_5pct.append(kpss_res[3]['5%'])
        cv_10pct.append(kpss_res[3]['10%'])
    kpss_res_df = pd.DataFrame({'Test statistic': test_stat, 
                               'p-value': p_val, 
                               'Critical value - 5%': cv_5pct,
                               'Critical value - 10%': cv_10pct}, 
                             index=data_df.columns).T
    kpss_res_df = kpss_res_df.round(4)
    return kpss_res_df

In [79]:
kpss_daily_results = kpss_test(ts_df)
kpss_daily_results

Unnamed: 0,GME_close,GME_return,IWM_close,IWM_return,ODP_close,ODP_return,BBY_close,BBY_return,FCFS_close,FCFS_return,sum_comments,max_score
Test statistic,0.0823,0.111,0.061,0.0552,0.1678,0.1363,0.0709,0.0758,0.2175,0.1173,0.0891,0.0838
p-value,0.1,0.1,0.1,0.1,0.0318,0.068,0.1,0.1,0.01,0.1,0.1,0.1
Critical value - 5%,0.146,0.146,0.146,0.146,0.146,0.146,0.146,0.146,0.146,0.146,0.146,0.146
Critical value - 10%,0.119,0.119,0.119,0.119,0.119,0.119,0.119,0.119,0.119,0.119,0.119,0.119


In [80]:
#adf test 
def adf_test(data_df):
    test_stat, p_val = [], []
    cv_5pct, cv_10pct = [], []
    for c in data_df.columns: 
        adf_res = adfuller(data_df[c].dropna())
        test_stat.append(adf_res[0])
        p_val.append(adf_res[1])
        cv_5pct.append(adf_res[4]['5%'])
        cv_10pct.append(adf_res[4]['10%'])
    adf_res_df = pd.DataFrame({'Test statistic': test_stat, 
                               'p-value': p_val, 
                               'Critical value - 5%': cv_5pct,
                               'Critical value - 10%': cv_10pct}, 
                             index=data_df.columns).T
    adf_res_df = adf_res_df.round(4)
    return adf_res_df

In [81]:
adf_daily_results = adf_test(ts_df)
adf_daily_results

Unnamed: 0,GME_close,GME_return,IWM_close,IWM_return,ODP_close,ODP_return,BBY_close,BBY_return,FCFS_close,FCFS_return,sum_comments,max_score
Test statistic,-2.6899,-3.4203,-0.3219,-5.6514,-1.2678,-5.0411,-0.6063,-4.9057,-1.6574,-2.441,-2.5247,-1.4966
p-value,0.0758,0.0103,0.9223,0.0,0.6438,0.0,0.8696,0.0,0.4532,0.1305,0.1096,0.5352
Critical value - 5%,-2.9435,-2.946,-2.9371,-2.9391,-2.9371,-2.9391,-2.9391,-2.9391,-2.946,-2.9435,-2.9391,-2.9371
Critical value - 10%,-2.6104,-2.6117,-2.607,-2.6081,-2.607,-2.6081,-2.6081,-2.6081,-2.6117,-2.6104,-2.6081,-2.607


In [82]:
#making non stationary series stationary
#all close prices are non stationary, GME_close is weakly stationary
#all returns are stationary
#sum_comment sis weakly non stationary
#max_score is non stationary

#using first differencing for all non stationary series
for col in ts_df.columns:
    if 'close' in col:
        ts_df[col] = ts_df[col] - ts_df[col].shift(1)

ts_df['max_score'] = ts_df['max_score'] - ts_df['max_score'].shift(1)
ts_df['sum_comments'] = ts_df['sum_comments'] - ts_df['sum_comments'].shift(1)

In [83]:
ts_df = ts_df.dropna()

In [84]:
def significance_stars(p_value):
    """Return significance stars based on p-value."""
    if p_value < 0.01:
        return '***'  # 1% level
    elif p_value < 0.05:
        return '**'   # 5% level
    elif p_value < 0.10:
        return '*'    # 10% level
    else:
        return ''     # Not significant


def granger_causation_matrix(data, variables, p, test='ssr_chi2test', verbose=False):
    """Check Granger Causality of all possible combinations of the time series.
    The rows are the response variables, columns are predictors. The values in the table 
    are the P-Values with significance stars.
    
    data      : pandas dataframe containing the time series variables
    variables : list containing names of the time series variables.
    """
    df = pd.DataFrame(np.zeros((len(variables), len(variables))), columns=variables, index=variables)
    for c in df.columns:
        for r in df.index:
            test_result = grangercausalitytests(data[[r, c]], p, verbose=False)
            p_values = [round(test_result[i+1][0][test][1], 3) for i in range(p)]
            if verbose: 
                print(f'Y = {r}, X = {c}, P Values = {p_values}')
            min_p_value = np.min(p_values)
            significance = significance_stars(min_p_value)  # Get significance stars
            df.loc[r, c] = f'{min_p_value} {significance}'  # Combine p-value with stars
            
    df.columns = [var + '_x' for var in variables]
    df.index = [var + '_y' for var in variables]
    return df


In [86]:
#doing the same for 5 min data 
post_5ts_df = post_df[['num_comments','score','created_et_timestamp','created_et_date']]

In [87]:
post_5ts_df['created_et_date'] = pd.to_datetime(post_5ts_df['created_et_date'])
post_5ts_df=post_5ts_df[post_5ts_df['created_et_date'] >= '2020-12-15']
post_5ts_df.head()

Unnamed: 0,num_comments,score,created_et_timestamp,created_et_date
475,0,1,2020-12-15 00:10:43-05:00,2020-12-15
476,0,1,2020-12-15 01:29:46-05:00,2020-12-15
477,0,1,2020-12-15 01:41:16-05:00,2020-12-15
478,0,1,2020-12-15 01:41:23-05:00,2020-12-15
479,0,1,2020-12-15 02:22:45-05:00,2020-12-15


In [88]:
post_5ts_df['created_et_timestamp'] = pd.to_datetime(post_5ts_df['created_et_timestamp']).dt.tz_localize(None)

post_5ts_df.set_index('created_et_timestamp', inplace=True)
post_5ts_df = post_5ts_df.resample('5T').agg({
    'score': 'max',                 # Maximum score
    'num_comments': 'sum',          # Sum of comments
    'created_et_date': 'first',     # First created_et_date
})
post_5ts_df=post_5ts_df.reset_index()

In [89]:
comb_5min_intra.head()
comb_5min_intra = comb_5min_intra.drop(['SPX_Close', 'SPX_5min_return'], axis=1)

In [90]:
post_5ts_df.tail()

Unnamed: 0,created_et_timestamp,score,num_comments,created_et_date
18175,2021-02-16 02:35:00,1.0,4,2021-02-16
18176,2021-02-16 02:40:00,344.0,56,2021-02-16
18177,2021-02-16 02:45:00,1.0,4,2021-02-16
18178,2021-02-16 02:50:00,1.0,2,2021-02-16
18179,2021-02-16 02:55:00,1.0,2,2021-02-16


In [91]:
comb_5min_intra['DateTime'] = pd.to_datetime(comb_5min_intra['DateTime'])
final_5ts_df = comb_5min_intra.merge(post_5ts_df, left_on='DateTime', right_on='created_et_timestamp', how='left')


In [92]:
final_5ts_df = final_5ts_df.drop(columns=['created_et_date','created_et_timestamp'])
final_5ts_df.set_index('DateTime', inplace=True)

In [93]:
final_5ts_df.tail()

Unnamed: 0_level_0,BBY_Close,FCFS_Close,IWM_Close,ODP_Close,GME_Close,BBY_5min_return,FCFS_5min_return,IWM_5min_return,ODP_5min_return,GME_5min_return,score,num_comments
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2021-02-12 15:40:00,103.5835,59.2558,215.4888,44.2,12.8925,0.000748,-0.000482,-4.4e-05,-0.001581,0.000287,1448.0,194
2021-02-12 15:45:00,103.4975,59.1702,215.7027,44.25,12.98,-0.00083,-0.001445,0.000993,0.001131,0.006787,4.0,84
2021-02-12 15:50:00,103.5147,59.1702,216.0306,44.18,13.0625,0.000166,0.0,0.00152,-0.001582,0.006356,27.0,62
2021-02-12 15:55:00,103.704,59.5032,216.0211,44.24,13.0825,0.001829,0.005628,-4.4e-05,0.001358,0.001531,8.0,94
2021-02-12 16:00:00,103.6782,59.5032,216.1637,44.25,12.95,-0.000249,0.0,0.00066,0.000226,-0.010128,3471.0,59609


In [94]:
kpss_5min_results = kpss_test(final_5ts_df)
kpss_5min_results

Unnamed: 0,BBY_Close,FCFS_Close,IWM_Close,ODP_Close,GME_Close,BBY_5min_return,FCFS_5min_return,IWM_5min_return,ODP_5min_return,GME_5min_return,score,num_comments
Test statistic,0.5287,1.842,0.4028,1.4494,0.5425,0.0835,0.1201,0.0456,0.1311,0.1538,0.5874,0.4769
p-value,0.01,0.01,0.01,0.01,0.01,0.1,0.098,0.1,0.0776,0.0435,0.01,0.01
Critical value - 5%,0.146,0.146,0.146,0.146,0.146,0.146,0.146,0.146,0.146,0.146,0.146,0.146
Critical value - 10%,0.119,0.119,0.119,0.119,0.119,0.119,0.119,0.119,0.119,0.119,0.119,0.119


In [95]:
adf_5min_results = adf_test(final_5ts_df)
adf_5min_results

Unnamed: 0,BBY_Close,FCFS_Close,IWM_Close,ODP_Close,GME_Close,BBY_5min_return,FCFS_5min_return,IWM_5min_return,ODP_5min_return,GME_5min_return,score,num_comments
Test statistic,-0.5039,-1.5025,-0.7249,-1.3587,-1.8203,-39.4059,-44.33,-57.654,-19.9318,-12.7461,-3.3055,-5.7685
p-value,0.8913,0.5323,0.8402,0.6019,0.3704,0.0,0.0,0.0,0.0,0.0,0.0146,0.0
Critical value - 5%,-2.8624,-2.8624,-2.8624,-2.8624,-2.8624,-2.8624,-2.8624,-2.8624,-2.8624,-2.8624,-2.8624,-2.8624
Critical value - 10%,-2.5672,-2.5672,-2.5672,-2.5672,-2.5672,-2.5672,-2.5672,-2.5672,-2.5672,-2.5672,-2.5672,-2.5672


In [96]:
# all close price non stationary 
#score comment non stationary
for col in final_5ts_df.columns:
    if 'close' in col:
        final_5ts_df[col] = final_5ts_df[col] - final_5ts_df[col].shift(1)

final_5ts_df['score'] = final_5ts_df['score'] - final_5ts_df['score'].shift(1)
final_5ts_df['num_comments'] = final_5ts_df['num_comments'] - final_5ts_df['num_comments'].shift(1)


In [135]:
final_5ts_df = final_5ts_df.dropna()
result_5min = granger_causation_matrix(final_5ts_df, final_5ts_df.columns, 5)  
#result_5min.iloc[5:-2, -2:]
result_5min

Unnamed: 0,BBY_Close_x,FCFS_Close_x,IWM_Close_x,ODP_Close_x,GME_Close_x,BBY_5min_return_x,FCFS_5min_return_x,IWM_5min_return_x,ODP_5min_return_x,GME_5min_return_x,score_x,num_comments_x
BBY_Close_y,,0.3335,0.0022 ***,0.0061 ***,0.042 **,0.3147,0.5101,0.6342,0.4618,0.8717,0.5431,0.0312 **
FCFS_Close_y,0.7762,,0.046 **,0.3881,0.3173,0.6864,0.0 ***,0.0447 **,0.0266 **,0.2816,0.5624,0.0205 **
IWM_Close_y,0.9245,0.1409,,0.1374,0.7598,0.6839,0.1445,0.4848,0.0005 ***,0.9788,0.4622,0.025 **
ODP_Close_y,0.2245,0.0572 *,0.6789,,0.0848 *,0.971,0.6447,0.1241,0.0 ***,0.0032 ***,0.099 *,0.0006 ***
GME_Close_y,0.0086 ***,0.0365 **,0.0294 **,0.1957,,0.0211 **,0.0126 **,0.0064 ***,0.1245,0.0006 ***,0.003 ***,0.0002 ***
BBY_5min_return_y,0.5631,0.5818,0.6472,0.5321,0.0464 **,,0.4588,0.2738,0.2666,0.9418,0.5959,0.0222 **
FCFS_5min_return_y,0.3586,0.1727,0.2793,0.4261,0.2927,0.442,,0.0123 **,0.5015,0.1647,0.4625,0.0082 ***
IWM_5min_return_y,0.3586,0.2588,0.3567,0.8858,0.6436,0.4303,0.0875 *,,0.3329,0.959,0.4768,0.018 **
ODP_5min_return_y,0.0334 **,0.7198,0.0157 **,0.0989 *,0.0514 *,0.3024,0.546,0.0194 **,,0.0741 *,0.221,0.0009 ***
GME_5min_return_y,0.2157,0.0511 *,0.8479,0.3873,0.021 **,0.0793 *,0.0178 **,0.0351 **,0.4666,,0.0658 *,0.0 ***


In [98]:
from statsmodels.tsa.api import VAR
from statsmodels.tsa.stattools import grangercausalitytests

def significance_stars(p_value):
    """Return significance stars based on p-value."""
    if p_value < 0.01:
        return '***'  # 1% level
    elif p_value < 0.05:
        return '**'   # 5% level
    elif p_value < 0.10:
        return '*'    # 10% level
    else:
        return ''     # Not significant

def granger_causation_matrix(data, variables, max_lag=10, test='ssr_chi2test', ic='bic', verbose=False):
    """
    Compute Granger Causality Matrix with optimal lag selection using information criterion.

    Parameters:
    - data: pandas DataFrame of time series
    - variables: list of column names
    - max_lag: maximum lag to consider
    - test: Granger test type (default: 'ssr_chi2test')
    - ic: information criterion to select lag ('aic', 'bic', etc.)
    - verbose: print internal p-values
    """
    df = pd.DataFrame(index=variables, columns=variables)

    for r in variables:  # response
        for c in variables:  # predictor
            if r == c:
                df.loc[r, c] = ''
                continue

            sub_df = data[[r, c]].dropna()

            # Determine optimal lag using VAR
            try:
                model = VAR(sub_df)
                selected_lag = model.select_order(max_lag).selected_orders[ic]
                selected_lag = max(1, selected_lag)  # Ensure at least 1 lag
            except Exception as e:
                if verbose:
                    print(f"Lag selection failed for ({r}, {c}): {e}")
                selected_lag = 1  # Fallback to lag 1 if selection fails

            # Run Granger causality test
            try:
                test_result = grangercausalitytests(sub_df, maxlag=selected_lag, verbose=False)
                p_value = test_result[selected_lag][0][test][1]
                formatted_p = round(p_value, 4)
                star = significance_stars(formatted_p)
                df.loc[r, c] = f'{formatted_p} {star}'
                if verbose:
                    print(f'Y = {r}, X = {c}, p = {formatted_p}')
            except Exception as e:
                if verbose:
                    print(f'Granger test failed for ({r}, {c}): {e}')
                df.loc[r, c] = 'err'

    df.columns = [var + '_x' for var in df.columns]
    df.index = [var + '_y' for var in df.index]
    return df


In [139]:
daily_gra = granger_causation_matrix(ts_df, ts_df.columns, 4)   #optimal was 4
#daily_gra.iloc[:10, -2:]
daily_gra

Unnamed: 0,GME_close_x,GME_return_x,IWM_close_x,IWM_return_x,ODP_close_x,ODP_return_x,BBY_close_x,BBY_return_x,FCFS_close_x,FCFS_return_x,sum_comments_x,max_score_x
GME_close_y,,0.0319 **,0.2545,0.2603,0.8764,0.8674,0.4244,0.4399,0.0653 *,0.0385 **,0.0034 ***,0.0044 ***
GME_return_y,0.013 **,,0.4111,0.4211,0.9794,0.8701,0.3072,0.3198,0.2189,0.1636,0.1689,0.0042 ***
IWM_close_y,0.587,0.8745,,0.5785,0.1161,0.1201,0.5516,0.4865,0.0386 **,0.0483 **,0.044 **,0.0843 *
IWM_return_y,0.5789,0.8845,0.5425,,0.1209,0.1224,0.5601,0.4924,0.028 **,0.0365 **,0.0505 *,0.1003
ODP_close_y,0.9559,0.4559,0.4847,0.5331,,0.3876,0.2463,0.2863,0.2167,0.2074,0.7921,0.0542 *
ODP_return_y,0.9744,0.4777,0.5672,0.616,0.4289,,0.3045,0.3439,0.2696,0.2609,0.8187,0.0817 *
BBY_close_y,0.8319,0.4495,0.3719,0.3816,0.0198 **,0.0125 **,,0.879,0.3997,0.3829,0.0865 *,0.0004 ***
BBY_return_y,0.8192,0.4445,0.3802,0.3882,0.0189 **,0.0112 **,0.9045,,0.3758,0.3618,0.1007,0.0008 ***
FCFS_close_y,0.9181,0.9937,0.9985,0.9632,0.0166 **,0.0093 ***,0.4529,0.4524,,0.7014,0.08 *,0.7896
FCFS_return_y,0.9323,0.9996,0.9923,0.9536,0.0137 **,0.0085 ***,0.4097,0.4094,0.6437,,0.0458 **,0.7046


In [147]:
result = granger_causation_matrix(ts_df, ts_df.columns)
#result.iloc[:10, -2:]
result

Unnamed: 0,GME_close_x,GME_return_x,IWM_close_x,IWM_return_x,ODP_close_x,ODP_return_x,BBY_close_x,BBY_return_x,FCFS_close_x,FCFS_return_x,sum_comments_x,max_score_x
GME_close_y,,0.0319 **,0.2545,0.2603,0.8764,0.8674,0.4244,0.4399,0.0653 *,0.0385 **,0.0 ***,0.0044 ***
GME_return_y,0.013 **,,0.4111,0.4211,0.9794,0.8701,0.3072,0.3198,0.2189,0.1636,0.1689,0.0042 ***
IWM_close_y,0.587,0.8745,,0.5785,0.1161,0.1201,0.5516,0.4865,0.0386 **,0.0483 **,0.044 **,0.0843 *
IWM_return_y,0.5789,0.8845,0.5425,,0.1209,0.1224,0.5601,0.4924,0.028 **,0.0365 **,0.0505 *,0.1003
ODP_close_y,0.9559,0.4559,0.4847,0.5331,,0.3876,0.2463,0.2863,0.2167,0.2074,0.7921,0.0542 *
ODP_return_y,0.9744,0.4777,0.5672,0.616,0.4289,,0.3045,0.3439,0.2696,0.2609,0.8187,0.0817 *
BBY_close_y,0.8319,0.4495,0.3719,0.3816,0.0198 **,0.0125 **,,0.879,0.3997,0.3829,0.0865 *,0.0004 ***
BBY_return_y,0.8192,0.4445,0.3802,0.3882,0.0189 **,0.0112 **,0.9045,,0.3758,0.3618,0.1007,0.0008 ***
FCFS_close_y,0.9181,0.9937,0.9985,0.9632,0.0166 **,0.0093 ***,0.4529,0.4524,,0.7014,0.08 *,0.7896
FCFS_return_y,0.9323,0.9996,0.9923,0.9536,0.0137 **,0.0085 ***,0.4097,0.4094,0.6437,,0.0458 **,0.7046


In [149]:
result_5min = granger_causation_matrix(final_5ts_df, final_5ts_df.columns)
#result_5min.iloc[5:-2, -2:]
result_5min

Unnamed: 0,BBY_Close_x,FCFS_Close_x,IWM_Close_x,ODP_Close_x,GME_Close_x,BBY_5min_return_x,FCFS_5min_return_x,IWM_5min_return_x,ODP_5min_return_x,GME_5min_return_x,score_x,num_comments_x
BBY_Close_y,,0.3335,0.0022 ***,0.0061 ***,0.042 **,0.3147,0.5101,0.6342,0.4618,0.8717,0.2105,0.0 ***
FCFS_Close_y,0.7762,,0.046 **,0.3881,0.3173,0.6864,0.0079 ***,0.0447 **,0.0266 **,0.2816,0.8892,0.0148 **
IWM_Close_y,0.9245,0.1409,,0.1374,0.7598,0.6839,0.1445,0.4848,0.0005 ***,0.9788,0.0283 **,0.0407 **
ODP_Close_y,0.2245,0.0572 *,0.6789,,0.0848 *,0.971,0.6447,0.1241,0.0 ***,0.0032 ***,0.1663,0.0017 ***
GME_Close_y,0.0086 ***,0.0365 **,0.0294 **,0.1957,,0.0211 **,0.0126 **,0.0064 ***,0.1245,0.0006 ***,0.0 ***,0.0 ***
BBY_5min_return_y,0.5631,0.5818,0.6472,0.5321,0.0464 **,,0.4588,0.2738,0.2666,0.9418,0.2283,0.0 ***
FCFS_5min_return_y,0.3586,0.0841 *,0.2793,0.4261,0.2927,0.442,,0.0123 **,0.5015,0.1647,0.8377,0.0056 ***
IWM_5min_return_y,0.3586,0.2588,0.3567,0.8858,0.6436,0.4303,0.0875 *,,0.3329,0.959,0.0301 **,0.0285 **
ODP_5min_return_y,0.0334 **,0.7198,0.0157 **,0.0989 *,0.0514 *,0.3024,0.546,0.0194 **,,0.0741 *,0.3621,0.0007 ***
GME_5min_return_y,0.2157,0.0511 *,0.8479,0.3873,0.021 **,0.0793 *,0.0178 **,0.0351 **,0.4666,,0.0102 **,0.0 ***
