Notebook used to inspect results of Granger Causality analysis

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
import math
import sys
from pathlib import Path

# Get the project root: notebooks/AI_narrative_index
root_dir = Path.cwd().parent

# Add src/scripts to the Python modules search path
sys.path.append(str(root_dir / "src" / "scripts"))

# import custom functions
#rom plot_granger_causality import plot_aini_lags_by_year, plot_aini_lags_for_year

In [3]:
# Get the project root: notebooks/AI_narrative_index
root_dir = Path.cwd().parent

# set variable path
var_path = root_dir / "data" / "processed" / "variables"

# load data for aini -> return
gc_c = pd.read_csv(var_path / "granger_causality_binary.csv")
gc_w0 = pd.read_csv(var_path / "granger_causality_w0.csv")
gc_w1 = pd.read_csv(var_path / "granger_causality_w1.csv")
gc_w2 = pd.read_csv(var_path / "granger_causality_w2.csv")

# define table path
table_path = root_dir / "reports" / "tables"

# Export as HTML for online appendix
#aini_to_ret_w1_df.to_html(table_path / "gc_bootstrap_F_all_lags_groupwise_AINI_to_return_3class_w1.html", index=False)
gc_w1.columns

Index(['Ticker', 'AINI_variant', 'Year', 'Direction', 'A2R_beta_const',
       'A2R_beta_ret_lag1', 'A2R_beta_x_lag1', 'A2R_beta_x_lag2',
       'A2R_beta_x_lag3', 'p_x', 'N_obs', 'N_boot', 'N_boot_valid', 'F_stat',
       'df_num', 'df_den', 'Original_F_pval', 'Empirical_F_pval', 'r2_u',
       'adj_r2_u', 'R2A_beta_const', 'R2A_beta_x_lag1', 'R2A_beta_x_lag2',
       'R2A_beta_x_lag3', 'R2A_beta_ret_lag1', 'BH_reject_F', 'BH_corr_F_pval',
       'BH_reject_F_HC3', 'BH_corr_F_pval_HC3'],
      dtype='object')

In [None]:
# subset for p > 0.9
alpha = 0.05

gc_c_sub = gc_c[
    (gc_c["BH_corr_F_pval"] < alpha) | (gc_c["BH_corr_F_pval_HC3"] < alpha)
].copy()

gc_w0_sub = gc_w0[
    (gc_w0["BH_corr_F_pval"] < alpha) | (gc_w0["BH_corr_F_pval_HC3"] < alpha)
].copy()

gc_w1_sub = gc_w1[
    (gc_w1["BH_corr_F_pval"] < alpha) | (gc_w1["BH_corr_F_pval_HC3"] < alpha)
].copy()

gc_w2_sub = gc_w2[
    (gc_w2["BH_corr_F_pval"] < alpha) | (gc_w2["BH_corr_F_pval_HC3"] < alpha)
].copy()
gc_w1_sub

In [None]:
# construct subsets for thesis
gc_table_cols = ["Ticker", "AINI_variant", "Year","coef_x1","coef_x1","Empirical_F_p","BH_corr_F"]
test_table_cols = ["Ticker", "AINI_variant", "Year","coef_x1","coef_x1","Empirical_F_p","BH_corr_F"]

Inspect results 

In [None]:
aini_to_ret_w1_df_sub.columns

Create reporting tables for thesis

In [None]:
# --- Find best AIC/BIC per (Ticker, Variable, Year) ---
idx_best_aic = gc_df_sign.groupby(["Ticker", "Variable", "Year"])["AIC"].idxmin()
idx_best_bic = gc_df_sign.groupby(["Ticker", "Variable", "Year"])["BIC"].idxmin()

gc_df_sign["best_aic_ticker_variable_year"] = gc_df_sign.index.isin(idx_best_aic)
gc_df_sign["best_bic_ticker_variable_year"] = gc_df_sign.index.isin(idx_best_bic)

# --- Find best AIC/BIC per (Ticker, Year) only (aggregate across all variables) ---
idx_best_aic_ty = gc_df_sign.groupby(["Ticker", "Year"])["AIC"].idxmin()
idx_best_bic_ty = gc_df_sign.groupby(["Ticker", "Year"])["BIC"].idxmin()

gc_df_sign["best_aic_ticker_year"] = gc_df_sign.index.isin(idx_best_aic_ty)
gc_df_sign["best_bic_ticker_year"] = gc_df_sign.index.isin(idx_best_bic_ty)

# --- Identify all p-value columns of AINI lags (excluding constant/intercept) ---
aini_p_cols = [col for col in gc_df_sign.columns if col.startswith("p_") and "const" not in col]

# --- Filter 1: best AIC or BIC by (Ticker, Variable, Year) + significant AINI lag ---
gc_df_best_tv_y = gc_df_sign[
    (gc_df_sign["best_aic_ticker_variable_year"] | gc_df_sign["best_bic_ticker_variable_year"]) &
    (gc_df_sign[aini_p_cols] < 0.05).any(axis=1)
]

# --- Filter 2: best AIC or BIC by (Ticker, Year) + significant AINI lag ---
gc_df_best_t_y = gc_df_sign[
    (gc_df_sign["best_aic_ticker_year"] | gc_df_sign["best_bic_ticker_year"]) &
    (gc_df_sign[aini_p_cols] < 0.05).any(axis=1)
]

# --- Export Filter 1 (ticker-variable-year) to HTML/CSV ---
gc_df_best_tv_y.to_html(
    table_path / "granger_causality_best_aic_or_bic_ticker_variable_year.html", na_rep=''
)

# --- Export Filter 2 (ticker-year) to HTML/CSV ---
gc_df_best_t_y.to_html(
    table_path / "granger_causality_best_aic_or_bic_ticker_year.html", na_rep=''
)
gc_df_best_t_y.to_csv(
    var_path / "granger_causality_best_aic_or_bic_ticker_year.csv", index=False
)

# Optional preview
gc_df_best_t_y


re-perform Granger Causality analysis to include t-stats for lowest AIC/BIC variables

In [None]:
# group by year
gc_df_index = gc_df.set_index('Year')
gc_df.columns
gc_df_sort

In [None]:
# plot for different betas
lag_list = [f't-{i}' for i in range(1,21)]
for lag in lag_list:
    plot_aini_lags_by_year(gc_df_index, lag = lag)

In [None]:
for Year in set(gc_df.Year.values):
    plot_aini_lags_for_year(gc_df, Year)