Notebook used to estimate Transfer Entropy between AINI variables and log returns

In [None]:
import pandas as pd
import numpy as np
from pathlib import Path
import importlib
import sys

# Get the project root: notebooks/AI_narrative_index
project_root = Path.cwd().parent

# Add src/scripts & modelling to the Python modules search path
sys.path.append(str(project_root / "src" / "modelling"))

from transfer_entropy import estimate_te

In [None]:
 # get variables path
var_path = project_root / "data" / "processed" / "variables" 

# get financial data path
fin_path = project_root / "data" / "raw" / "financial" 

# load financial data
fin_data = pd.read_csv(fin_path / "full_daily_2023_2025.csv")

# load binary AINI predictions (0 to 1 index)
aini_data = pd.read_csv(var_path / "binary_AINI_variables.csv")

# load polarity predictions, windowsize=1
w1_aini_data = pd.read_csv(var_path / "w1_AINI_variables.csv")

In [None]:
# ensure sorting
fin_data['Date'] = pd.to_datetime(fin_data['Date'])
fin_data = fin_data.sort_values(['Ticker', 'Date'])

# Calculate log returns by Ticker
fin_data['LogReturn'] = fin_data.groupby('Ticker')['Adj Close'].transform(lambda x: np.log(x) - np.log(x.shift(1)))
fin_data = fin_data.dropna(subset=['LogReturn'])

# verify financial data
print(fin_data.head())
print(fin_data.isna().any())

# verify AINI data
print(aini_data.head())
print(aini_data.isna().any())

In [None]:
# Ensure both columns are datetime
fin_data['Date'] = pd.to_datetime(fin_data['Date'])
aini_data['date'] = pd.to_datetime(aini_data['date'])

# Define thresholds
threshold_23 = pd.Timestamp('2023-12-31')
threshold_24 = pd.Timestamp('2024-01-01')
threshold_25 = pd.Timestamp('2025-01-01')

# Filter data by year
fin_data_23 = fin_data[fin_data['Date'] < threshold_24]
fin_data_24 = fin_data[(fin_data['Date'] > threshold_23) & (fin_data['Date'] < threshold_25)]
fin_data_25 = fin_data[fin_data['Date'] >= threshold_25]

# overlapping
fin_data_23_24 = fin_data[fin_data['Date'] <= threshold_25]
fin_data_24_25 = fin_data[fin_data['Date'] > threshold_23]
fin_data_23_24

fin_data_by_year = {
    2023: fin_data_23,
    2024: fin_data_24,
    2025: fin_data_25,
    "2023_24": fin_data_23_24,
    "2024_25": fin_data_24_25,
    "2023_24_25": fin_data  
}

Estimate Transfer Entropy between binary AINI variables & Stocks / ETFs

In [None]:
# run predictions
aini_variants = [
    "normalized_AINI",
    "MA_7",
    "EMA_06",
    "EMA_08",
]

fin_data_by_year = {
    2023: fin_data_23,
    2024: fin_data_24,
    2025: fin_data_25,
    2023_24: fin_data_23_24,
    2024_25: fin_data_24_25,
    2023_24_25: fin_data
}
all_te_results = []

for year, fin_data in fin_data_by_year.items():
    for aini_var in aini_variants:
        print(f"\n⏳ Processing {aini_var} for {year}...")
        try:
            array_dict = get_ticker_for_TE(fin_data, aini_data, aini_var, year)
            results_df = estimate_te(
                array_dict, year, aini_var,
                max_lag_sources=1,
                n_perm=200,
                save=False  
            )
            all_te_results.append(results_df)
            print(f"✅ Done: {aini_var} for {year}")

        except Exception as e:
            print(f"❌ Failed: {aini_var} for {year} → {e}")

# Combine and save after all loops
if all_te_results:
    combined_df = pd.concat(all_te_results, ignore_index=True)
    combined_path = var_path / "combined_te_results.csv"
    combined_df.to_csv(combined_path, index=False)
    print(f"\n📄 All TE results saved to: {combined_path}")
all_te_results

Estimate Transfer Entropy between polarity AINI variables & Stocks / ETFs for windowsize = 1

In [None]:
aini_variants = [
    "normalized_AINI",
    "EMA_08",
]

fin_data_by_year = {
    "2023": fin_data_23,
    "2024": fin_data_24,
    "2025": fin_data_25,
    "2023_24": fin_data_23_24,
    "2024_25": fin_data_24_25,
    "2023_24_25": fin_data
}

all_te_results = []

for year, fin_data in fin_data_by_year.items():
    for aini_var in aini_variants:
        print(f"\n⏳ Processing {aini_var} for {year}...")

        try:
            results_df = estimate_te(
                year=year,
                fin_data=fin_data,
                aini_data=w1_aini_data,
                aini_vars=[aini_var],  
                target_name="LogReturn",
                max_lag_sources=2,
                n_perm=200,
                k_list=[2, 3, 4],
                save=False,
                window=1
            )

            all_te_results.append(results_df)
            print(f"✅ Done: {aini_var} for {year}")

        except Exception as e:
            print(f"❌ Failed: {aini_var} for {year} → {e}")

# Combine and save
if all_te_results:
    combined_df = pd.concat(all_te_results, ignore_index=True)
    combined_path = var_path / "combined_te_results_window_1.csv"
    combined_df.to_csv(combined_path, index=False)
    print(f"\n📄 All TE results saved to: {combined_path}")
combined_df

Estimate Transfer Entropy between polarity AINI variables & Stocks / ETFs for windowsize = 1