In [3]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests
import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv('hourly_data_ff.csv')

max_lag = 30
significance_level = 0.05

features = df.columns.difference(['vol', 'timestamp'])

useful_features = {}


In [4]:
signal = df['vol']

In [5]:
for feature in features:
    print(f"\nTesting Granger causality for feature: {feature}")
    try:
        test_result = grangercausalitytests(df[['vol', feature]].dropna(), maxlag=max_lag, verbose=False)
        significant_lags = [
            lag for lag in range(1, max_lag + 1)
            if test_result[lag][0]['ssr_ftest'][1] < significance_level
        ]
        if significant_lags:
            useful_features[feature] = significant_lags
            print(f"  Significant lags: {significant_lags}")
        else:
            print("  No significant lags")
    except Exception as e:
        print(f"  Error testing feature {feature}: {e}")


Testing Granger causality for feature: ask_depth
  Significant lags: [1, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]

Testing Granger causality for feature: ask_slope
  No significant lags

Testing Granger causality for feature: ask_volume
  No significant lags

Testing Granger causality for feature: bid_depth
  Significant lags: [1, 2, 3, 4, 5, 6, 7, 8, 9]

Testing Granger causality for feature: bid_slope
  No significant lags

Testing Granger causality for feature: bid_volume
  Significant lags: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]

Testing Granger causality for feature: difference
  No significant lags

Testing Granger causality for feature: spread
  Significant lags: [1, 2, 3, 4, 5, 6, 7, 8]

Testing Granger causality for feature: volume_difference
  Significant lags: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]



In [6]:
for feat, lags in useful_features.items():
    print(f"{feat}: lags {lags}")

ask_depth: lags [1, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
bid_depth: lags [1, 2, 3, 4, 5, 6, 7, 8, 9]
bid_volume: lags [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
spread: lags [1, 2, 3, 4, 5, 6, 7, 8]
volume_difference: lags [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]


In [7]:
sent = pd.read_csv('sentiment.csv')
sentiment = sent['Aggregate sentiment'].values

In [8]:
# Granger causality tests: do 'Aggregate sentiment' and 'fng_feature' cause volatility?
max_lag = 30

# Test using Aggregate sentiment
agg_sent_series = pd.Series(sent['Aggregate sentiment'].values, index=df.index[:len(sent)])
vol_and_agg = pd.DataFrame({
    'vol': df['vol'].iloc[:len(agg_sent_series)],
    'agg_sent': agg_sent_series
}).dropna()

print("\nTesting Granger causality between Aggregate sentiment and volatility:")
try:
    agg_test_result = grangercausalitytests(vol_and_agg[['vol', 'agg_sent']], maxlag=max_lag, verbose=False)
    agg_significant_lags = [
        lag for lag in range(1, max_lag + 1)
        if agg_test_result[lag][0]['ssr_ftest'][1] < 0.05
    ]
    if agg_significant_lags:
        print(f"Significant lags where Aggregate sentiment Granger-causes volatility: {agg_significant_lags}")
    else:
        print("No significant lags found for Aggregate sentiment causing volatility.")
except Exception as e:
    print(f"Error during Granger causality test for Aggregate sentiment: {e}")

# Test using fng_feature
fng_series = pd.Series(sent['fng_feature'].values, index=df.index[:len(sent)])
vol_and_fng = pd.DataFrame({
    'vol': df['vol'].iloc[:len(fng_series)],
    'fng': fng_series
}).dropna()

print("\nTesting Granger causality between fng_feature and volatility:")
try:
    fng_test_result = grangercausalitytests(vol_and_fng[['vol', 'fng']], maxlag=max_lag, verbose=False)
    fng_significant_lags = [
        lag for lag in range(1, max_lag + 1)
        if fng_test_result[lag][0]['ssr_ftest'][1] < 0.05
    ]
    if fng_significant_lags:
        print(f"Significant lags where fng_feature Granger-causes volatility: {fng_significant_lags}")
    else:
        print("No significant lags found for fng_feature causing volatility.")
except Exception as e:
    print(f"Error during Granger causality test for fng_feature: {e}")


Testing Granger causality between Aggregate sentiment and volatility:
Significant lags where Aggregate sentiment Granger-causes volatility: [1, 2]

Testing Granger causality between fng_feature and volatility:
Significant lags where fng_feature Granger-causes volatility: [1, 2, 3, 4, 5, 6, 7, 8, 18, 19]
