In [14]:
import pandas as pd
from statsmodels.tsa.stattools import grangercausalitytests
import warnings
warnings.filterwarnings("ignore")

df = pd.read_csv('hourly_data_ff.csv')

max_lag = 30
significance_level = 0.05

features = df.columns.difference(['vol', 'timestamp'])

useful_features = {}


In [46]:
signal = df['vol']

In [2]:
for feature in features:
    print(f"\nTesting Granger causality for feature: {feature}")
    try:
        test_result = grangercausalitytests(df[['vol', feature]].dropna(), maxlag=max_lag, verbose=False)
        significant_lags = [
            lag for lag in range(1, max_lag + 1)
            if test_result[lag][0]['ssr_ftest'][1] < significance_level
        ]
        if significant_lags:
            useful_features[feature] = significant_lags
            print(f"  Significant lags: {significant_lags}")
        else:
            print("  No significant lags")
    except Exception as e:
        print(f"  Error testing feature {feature}: {e}")


Testing Granger causality for feature: ask_depth
  Significant lags: [1, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]

Testing Granger causality for feature: ask_slope
  No significant lags

Testing Granger causality for feature: ask_volume
  No significant lags

Testing Granger causality for feature: bid_depth
  Significant lags: [1, 2, 3, 4, 5, 6, 7, 8, 9]

Testing Granger causality for feature: bid_slope
  No significant lags

Testing Granger causality for feature: bid_volume
  Significant lags: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]

Testing Granger causality for feature: difference
  No significant lags

Testing Granger causality for feature: spread
  Significant lags: [1, 2, 3, 4, 5, 6, 7, 8]

Testing Granger causality for feature: volume_difference
  Significant lags: [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]



In [3]:
for feat, lags in useful_features.items():
    print(f"{feat}: lags {lags}")

ask_depth: lags [1, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
bid_depth: lags [1, 2, 3, 4, 5, 6, 7, 8, 9]
bid_volume: lags [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]
spread: lags [1, 2, 3, 4, 5, 6, 7, 8]
volume_difference: lags [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]


In [5]:
sent = pd.read_csv('sentiment.csv')
sentiment = sent['Aggregate sentiment'].values

In [7]:
# Granger causality test: does sentiment cause volatility?
max_lag = 30
sentiment_series = pd.Series(sentiment, index=df.index[:len(sentiment)])

# Ensure both series are the same length and drop NaNs
vol_and_sent = pd.DataFrame({
    'vol': df['vol'].iloc[:len(sentiment_series)],
    'sentiment': sentiment_series
}).dropna()

print("\nTesting Granger causality between sentiment and volatility:")
try:
    test_result = grangercausalitytests(vol_and_sent[['vol', 'sentiment']], maxlag=max_lag, verbose=False)
    significant_lags = [
        lag for lag in range(1, max_lag + 1)
        if test_result[lag][0]['ssr_ftest'][1] < 0.05
    ]
    if significant_lags:
        print(f"Significant lags where sentiment Granger-causes volatility: {significant_lags}")
    else:
        print("No significant lags found for sentiment causing volatility.")
except Exception as e:
    print(f"Error during Granger causality test: {e}")


Testing Granger causality between sentiment and volatility:
Significant lags where sentiment Granger-causes volatility: [1, 2]
Significant lags where sentiment Granger-causes volatility: [1, 2]
