Notebook used to estimate Granger Causality between AINI variables and log returns.

In [None]:
import pandas as pd
import numpy as np
import sys
from pathlib import Path

# Get the project root: notebooks/AI_narrative_index
project_root = Path.cwd().parent

# Add src/scripts & modelling to the Python modules search path
sys.path.append(str(project_root / "src" / "scripts"))
sys.path.append(str(project_root / "src" / "modelling"))

# import custom functions
from estimate_granger_causality import  run_gc_mbboot_fdr
from estimate_OLS import run_sameday_ols_mbboot_fdr

Calculate Granger Causality and Transfer entropy for binary ( O / 1) AINI

load & verify data

In [None]:
 # get variables path
var_path = project_root / "data" / "processed" / "variables" 

# get financial data path
fin_path = project_root / "data" / "raw" / "financial" 

# load financial data
fin_data = pd.read_csv(fin_path / "full_daily_2023_2025.csv")

# load AINI predictions
aini_data_w0 = pd.read_csv(var_path / "w0_AINI_variables.csv")
aini_data_w1 = pd.read_csv(var_path / "w0_AINI_variables.csv")
aini_data_w2 = pd.read_csv(var_path / "w0_AINI_variables.csv")
aini_data_binary = pd.read_csv(var_path / "binary_AINI_variables.csv")


create log returns

Create subsets for different periods

In [None]:
fin_data = pd.read_csv(fin_path / "full_daily_2023_2025.csv")

# Ensure columns are datetime
fin_data['date'] = pd.to_datetime(fin_data['Date'])

# Define thresholds
threshold_23 = pd.Timestamp('2023-12-31')
threshold_24 = pd.Timestamp('2024-01-01')
threshold_25 = pd.Timestamp('2025-01-01')

# Filter data by year
fin_data_23 = fin_data[fin_data['date'] < threshold_24]
fin_data_24 = fin_data[(fin_data['date'] > threshold_23) & (fin_data['date'] < threshold_25)]
fin_data_25 = fin_data[fin_data['date'] >= threshold_25]

# overlapping
fin_data_23_24 = fin_data[fin_data['date'] <= threshold_25]
fin_data_24_25 = fin_data[fin_data['date'] > threshold_23]
fin_data_23_24

fin_data_by_year = {
    2023: fin_data_23,
    2024: fin_data_24,
    2025: fin_data_25,
    "2023_24": fin_data_23_24,
    "2024_25": fin_data_24_25,
    "2023_24_25": fin_data  
}

In [None]:
# ensure sorting
fin_data['Date'] = pd.to_datetime(fin_data['Date'])
fin_data = fin_data.sort_values(['Ticker', 'Date'])

# Calculate log returns by Ticker
fin_data['log_return'] = fin_data.groupby('Ticker')['Adj Close'].transform(lambda x: np.log(x) - np.log(x.shift(1)))
fin_data = fin_data.dropna(subset=['log_return'])

# create distinct df of log_returns & ticker
ticker_dfs = {}
fin_data

Estimate Grancger Causality and reverse Granger Causality between AINI variables & Stocks / ETFs

In [None]:
#w0_df =  run_gc_mbboot_fdr(
    #aini_df = aini_data_w0,
    #fin_data_by_year = fin_data_by_year,
    #version = "w0",
    #n_boot = 10000
#)

w1_df =  run_gc_mbboot_fdr(
    aini_df = aini_data_w0,
    fin_data_by_year = fin_data_by_year,
    version = "w1",
    n_boot = 10000
)

w2_df =  run_gc_mbboot_fdr(
    aini_df = aini_data_w0,
    fin_data_by_year = fin_data_by_year,
    version = "w2",
    n_boot = 10000
)

binary_df =  run_gc_mbboot_fdr(
    aini_df = aini_data_binary,
    fin_data_by_year = fin_data_by_year,
    version = "w2",
    n_boot = 10000
)

Estimate Granger Causality for polarity AINI with windowsize (w) = 1