Notebook used to inspect results of Granger Causality analysis

In [65]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
import math
import sys
from pathlib import Path
import re

# Get the project root: notebooks/AI_narrative_index
root_dir = Path.cwd().parent

# Add needed folders to the Python modules search path
sys.path.append(str(root_dir / "src" / "scripts"))
sys.path.append(str(root_dir / "src" / "visualizations"))
sys.path.append(str(root_dir / "src" / "modelling"))

# import custom functions
#rom plot_granger_causality import plot_aini_lags_by_year, plot_aini_lags_for_year
from construct_tables import make_mrkdwn_tables, markdown_tables_to_pptx, make_mrkdwn_tables_focus
from compute_rejections import compute_rejection_rates, export_rejection_rates_to_pptx

In [62]:
# define path to variables
var_path = root_dir / "data" / "processed" / "variables"
 
# load data (without VIX)
gc_c = pd.read_csv(var_path / "granger_causality_binary.csv")
gc_w0 = pd.read_csv(var_path / "granger_causality_w0.csv")
gc_w1 = pd.read_csv(var_path / "granger_causality_w1.csv")
gc_w2 = pd.read_csv(var_path / "granger_causality_w2.csv")

# merge them together
gc_all_results = pd.concat([gc_c, gc_w0, gc_w1, gc_w2], ignore_index=True)

# save merged results
gc_all_results.to_csv(var_path / "granger_causality_all.csv", index=False)

# define table path
table_path = root_dir / "reports" / "tables"

# Export as HTML for online appendix
gc_c.to_html(table_path / "granger_causality_custom_model.html", index=False)
gc_w0.to_html(table_path / "granger_causality_w0.html", index=False)
gc_w1.to_html(table_path / "granger_causality_w1.html", index=False)
gc_w2.to_html(table_path / "granger_causality_w2.html", index=False)
gc_all_results

Unnamed: 0,Ticker,AINI_variant,Year,Direction,A2R_beta_const,A2R_beta_ret_lag1,A2R_beta_x_lag1,A2R_beta_x_lag2,A2R_beta_x_lag3,p_x,...,adj_r2_u,R2A_beta_const,R2A_beta_x_lag1,R2A_beta_x_lag2,R2A_beta_x_lag3,R2A_beta_ret_lag1,BH_reject_F,BH_corr_F_pval,BH_reject_F_HC3,BH_corr_F_pval_HC3
0,AAPL,EMA_02,2023,AINI_to_RET,0.001593,0.076437,0.007926,0.000833,0.001452,3,...,-0.015701,,,,,,False,0.992701,False,0.990594
1,AAPL,EMA_02,2023,RET_to_AINI,,,,,,3,...,0.580983,-0.021349,0.875858,-0.184422,0.027947,-0.071632,False,0.599540,False,0.575194
2,AAPL,EMA_08,2023,AINI_to_RET,0.001013,0.075802,0.000663,0.003511,-0.001821,3,...,-0.015893,,,,,,False,0.992701,False,0.990594
3,AAPL,EMA_08,2023,RET_to_AINI,,,,,,3,...,0.044320,-0.065671,0.246771,-0.065085,-0.029918,-0.365295,False,0.510749,False,0.474490
4,AAPL,normalized_AINI,2023,AINI_to_RET,0.000786,0.076543,0.001342,0.004588,-0.006561,3,...,-0.013486,,,,,,False,0.992701,False,0.990594
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2875,TSM,EMA_08,2024_25,RET_to_AINI,,,,,,3,...,0.277207,-0.000799,0.347678,0.170951,0.147330,0.049074,False,0.653135,False,0.622662
2876,TSM,normalized_AINI,2024_25,AINI_to_RET,0.001607,-0.101063,0.023602,-0.079523,0.000066,3,...,0.017862,,,,,,True,0.019298,True,0.006446
2877,TSM,normalized_AINI,2024_25,RET_to_AINI,,,,,,3,...,0.163026,-0.000917,0.210863,0.222601,0.135797,0.047294,False,0.653135,False,0.622662
2878,TSM,normalized_AINI_z,2024_25,AINI_to_RET,0.001698,-0.101063,0.001231,-0.004147,0.000003,3,...,0.017862,,,,,,True,0.019298,True,0.006446


In [63]:
# Focus only on AINI → Returns direction
a2r = gc_all_results[gc_all_results["Direction"].str.contains("AINI_to_RET", case=False)].copy()

# Make sure Year and Ticker are strings
a2r["Year"] = a2r["Year"].astype(str)
a2r["Ticker"] = a2r["Ticker"].astype(str)

# Convert bootstrap rejection flag to boolean
a2r["rej_bh_boot"] = a2r["BH_reject_F"].astype(bool)
a2r["rej_bh_hc"] = a2r["BH_reject_F_HC3"].astype(bool)

# Both-method significance = true if both are true
a2r["rej_both"] = a2r["rej_bh_boot"] & a2r["rej_bh_hc"]

# Total number of models tested
total = a2r["rej_both"].count()

# Number of rejections (both bootstrap + HC3 significant)
n_reject = a2r["rej_both"].sum()

# Rejection rate
rejection_rate = n_reject / total * 100

print(f"Total models: {total}")
print(f"Both-method rejections: {n_reject}")
print(f"Rejection rate: {rejection_rate:.2f}%")



Total models: 1440
Both-method rejections: 74
Rejection rate: 5.14%


In [66]:
# Compute rates from your full df
by_year, by_ticker = compute_rejection_rates(gc_all_results)

# Export to PPTX
ppt_path = export_rejection_rates_to_pptx(
    by_year=by_year,
    by_ticker=by_ticker,
    outpath= table_path / "rejection_rates_both_method.pptx",
    top_tickers=50,           # change as needed
    make_charts=True          # set False to skip charts
)

print("Saved PPT:", ppt_path)

Saved PPT: C:\Users\PC\Desktop\Masterarbeit\AI_narrative_index\reports\tables\rejection_rates_both_method.pptx


In [None]:

# Calculate rejection rate by Year
by_year = (
    a2r.groupby("Year")["rej_both"]
       .agg(["sum","count"])
       .assign(rate=lambda x: 100 * x["sum"] / x["count"])
       .sort_values("rate", ascending=False)
)

print("By Year (both-method rejection rate):")
print(by_year)

# By Ticker (both-method rejection rate)
by_ticker = (
    a2r.groupby("Ticker")["rej_both"]
       .agg(["sum","count"])
       .assign(rate=lambda x: 100 * x["sum"] / x["count"])
       .sort_values("rate", ascending=False)
)

print("\nBy Ticker (both-method rejection rate):")
print(by_ticker.head(8))

for idx, row in by_year.iterrows():
    print(f"{idx}: {row['rate']:.2f}%")

for idx, row in by_ticker.head(8).iterrows():
    print(f"{idx}: {row['rate']:.2f}%")

rejection_rate

In [None]:

# Calculate rejection rate by Year
by_year = (
    a2r.groupby("Year")["rej_both"]
       .agg(["sum","count"])
       .assign(rate=lambda x: 100 * x["sum"] / x["count"])
       .sort_values("rate", ascending=False)
)

print("By Year (both-method rejection rate):")
print(by_year)

# By Ticker (both-method rejection rate)
by_ticker = (
    a2r.groupby("Ticker")["rej_both"]
       .agg(["sum","count"])
       .assign(rate=lambda x: 100 * x["sum"] / x["count"])
       .sort_values("rate", ascending=False)
)

print("\nBy Ticker (both-method rejection rate):")
print(by_ticker.head(8))

for idx, row in by_year.iterrows():
    print(f"{idx}: {row['rate']:.2f}%")

for idx, row in by_ticker.head(8).iterrows():
    print(f"{idx}: {row['rate']:.2f}%")

rejection_rate

In [None]:
# subset for p < y 0.1
alpha = 0.1

gc_c_sub = gc_c[
    (gc_c["BH_corr_F_pval"] < alpha) | (gc_c["BH_corr_F_pval_HC3"] < alpha)
].copy()

gc_w0_sub = gc_w0[
    (gc_w0["BH_corr_F_pval"] < alpha) | (gc_w0["BH_corr_F_pval_HC3"] < alpha)
].copy()

gc_w1_sub = gc_w1[
    (gc_w1["BH_corr_F_pval"] < alpha) | (gc_w1["BH_corr_F_pval_HC3"] < alpha)
].copy()

gc_w2_sub = gc_w2[
    (gc_w2["BH_corr_F_pval"] < alpha) | (gc_w2["BH_corr_F_pval_HC3"] < alpha)
].copy()

dfs = [gc_c_sub,gc_w0_sub,gc_w1_sub,gc_w2_sub]
gc_w1_sub.columns

Index(['Ticker', 'AINI_variant', 'Year', 'Direction', 'A2R_beta_const',
       'A2R_beta_ret_lag1', 'A2R_beta_x_lag1', 'A2R_beta_x_lag2',
       'A2R_beta_x_lag3', 'p_x', 'N_obs', 'N_boot', 'N_boot_valid', 'F_stat',
       'df_num', 'df_den', 'Original_F_pval', 'Empirical_F_pval', 'r2_u',
       'adj_r2_u', 'R2A_beta_const', 'R2A_beta_x_lag1', 'R2A_beta_x_lag2',
       'R2A_beta_x_lag3', 'R2A_beta_ret_lag1', 'BH_reject_F', 'BH_corr_F_pval',
       'BH_reject_F_HC3', 'BH_corr_F_pval_HC3'],
      dtype='object')

In [None]:
labels = ["custom", "w0", "w1", "w2"]  # same order as dfs

# Collumns to drop in thesis-ready table
drop_cols = [
    "p_x","N_boot","N_obs","N_boot_valid","F_stat","df_den",
    "Original_F_pval","Empirical_F_pval","r2_u","BH_reject_F","BH_reject_F_HC3"
]

cleaned = []

# iterate over dfs to create subsets by direction
for name, df in zip(labels, dfs):
    d = df.drop(columns=[c for c in drop_cols if c in df.columns], errors="ignore")
    id_cols = [c for c in ["Ticker","AINI_variant","Year","Direction","BH_corr_F_pval","BH_corr_F_pval_HC3","adj_r2_u"] if c in d.columns]

    a2r = d.loc[d["Direction"]=="AINI_to_RET", id_cols + [c for c in d.columns if c.startswith("A2R_beta_")]].copy()
    r2a = d.loc[d["Direction"]=="RET_to_AINI", id_cols + [c for c in d.columns if c.startswith("R2A_beta_")]].copy()
    
    # tag which df it came from
    a2r["Model"] = name   
    r2a["Model"] = name

    cleaned.append({"Model": name, "A2R": a2r, "R2A": r2a})

# combined frames with the tag:
a2r_all = pd.concat([x["A2R"] for x in cleaned], ignore_index=True)
r2a_all = pd.concat([x["R2A"] for x in cleaned], ignore_index=True)
a2r_all_sort = a2r_all.sort_values(["Ticker","Year"])

#Build the chunked Markdown tables dict
markdown_a2r = make_mrkdwn_tables(a2r_all_sort)  # returns {model: [md_table_chunk1, ...]}

# Export to PowerPoint 
out = root_dir / "reports" / "figures" / "significant_measures_year_ticker_nocontrol_gc.pptx"
out.parent.mkdir(parents=True, exist_ok=True)
ppt_path = markdown_tables_to_pptx(markdown_a2r, outpath=str(out))

print("Saved PPT to:", ppt_path)
a2r_all_sort.columns

# subset on most strinking patterns for presentation
markdown_focus = make_mrkdwn_tables_focus(a2r_all_sort)

# Export to PowerPoint
ppt_path = markdown_tables_to_pptx(markdown_focus, outpath= root_dir / "reports" / "figures" / "focused_significant_measures_year_ticker_nocontrol_gc.pptx")
print("Saved:", ppt_path)

Saved PPT to: C:\Users\PC\Desktop\Masterarbeit\AI_narrative_index\reports\figures\significant_measures_year_ticker_nocontrol_gc.pptx
Saved: C:\Users\PC\Desktop\Masterarbeit\AI_narrative_index\reports\figures\focused_significant_measures_year_ticker_nocontrol_gc.pptx


In [None]:
# investigate main findings
a2r_all.sort_values("adj_r2_u",ascending=True)


Unnamed: 0,Ticker,AINI_variant,Year,Direction,BH_corr_F_pval,BH_corr_F_pval_HC3,adj_r2_u,A2R_beta_const,A2R_beta_ret_lag1,A2R_beta_x_lag1,A2R_beta_x_lag2,A2R_beta_x_lag3,Model
81,TSM,normalized_AINI,2023,AINI_to_RET,0.226777,0.099988,-0.003813,0.000612,0.015429,-0.044739,-0.012914,0.025389,w2
82,TSM,normalized_AINI_z,2023,AINI_to_RET,0.226777,0.099988,-0.003813,0.000830,0.015429,-0.002041,-0.000589,0.001158,w2
3,BOTZ,EMA_02,2025,AINI_to_RET,0.187481,0.060086,-0.003379,-0.007190,-0.141610,-0.139098,0.021418,0.073364,custom
80,TSM,EMA_08,2023,AINI_to_RET,0.226777,0.099988,-0.001073,0.000629,0.015641,-0.057612,-0.009749,0.037571,w2
51,TSM,normalized_AINI_z,2023_24,AINI_to_RET,0.130387,0.080171,0.000306,0.001930,-0.034614,-0.000942,-0.001692,0.000710,w1
...,...,...,...,...,...,...,...,...,...,...,...,...,...
12,NVDA,EMA_08,2025,AINI_to_RET,0.086091,0.008779,0.071351,-0.014402,-0.283644,-0.154800,-0.009562,0.069634,custom
14,NVDA,normalized_AINI_z,2025,AINI_to_RET,0.086091,0.008779,0.078354,-0.004270,-0.287468,-0.010395,-0.002784,0.005893,custom
13,NVDA,normalized_AINI,2025,AINI_to_RET,0.086091,0.008779,0.078354,-0.014116,-0.287468,-0.129831,-0.034769,0.073607,custom
1,AVGO,normalized_AINI,2025,AINI_to_RET,0.302370,0.055779,0.078727,-0.010601,-0.249003,-0.081873,-0.088667,0.102822,custom


In [None]:
# investigate potential dependency r2 & beta
a2r_all["adj_r2_u"].corr(a2r_all["A2R_beta_ret_lag1"])

In [None]:
# investigate differences
model_group_betas = (
    a2r_all_sort
    .groupby(["Model", "Year"])[["A2R_beta_x_lag1", "A2R_beta_x_lag2", "A2R_beta_x_lag3"]]
    .mean()
    .reset_index()
)
model_group_betas

Unnamed: 0,Model,Year,A2R_beta_x_lag1,A2R_beta_x_lag2,A2R_beta_x_lag3
0,custom,2025,-0.090873,0.00037,0.06399
1,w0,2023,0.218301,-0.104089,-0.263429
2,w0,2023_24,0.006456,0.016667,-0.114667
3,w0,2023_24_25,0.056725,-0.066104,-0.064796
4,w0,2024,0.021699,0.036063,-0.221321
5,w0,2024_25,-0.027472,-0.091733,0.057799
6,w0,2025,0.025004,-0.106798,0.031958
7,w1,2023_24,-0.018579,-0.030483,0.016913
8,w1,2023_24_25,-0.013529,-0.071207,0.043259
9,w1,2024,0.045989,-0.114767,0.050734


In [None]:
model_group_tickers = (
    a2r_all_sort
    .groupby(["Ticker", "Year"])
    .size()
    .reset_index(name="n_measures")
)

model_group_tickers["Ticker"] = model_group_tickers["Ticker"].replace({"TSM": "TSMC"})
model_group_tickers["Year"] = model_group_tickers["Year"].replace({"2023_24": "2023-2024"})
model_group_tickers["Year"] = model_group_tickers["Year"].replace({"2024_25": "2024-2025"})
model_group_tickers["Year"] = model_group_tickers["Year"].replace({"2023_24_25": "2023-2025"})


plt.figure(figsize=(14, 7), dpi=300)
ax = sns.barplot(data=model_group_tickers, x="Ticker", y="n_measures", hue="Year", dodge=True)
ax.set_title("Number of Significant Measures per Ticker and Year", fontsize=14)
ax.set_xlabel("Ticker", fontsize=12)
ax.set_ylabel("n_measures", fontsize=12)
ax.tick_params(axis="x", rotation=90)
ax.legend(title="Year", fontsize=10, title_fontsize=11, loc="best")
plt.tight_layout()

out_path = root_dir / "reports" / "figures" / "significant_measures_year_ticker_nocontrol_gc.png"
plt.savefig(out_path, dpi=300, bbox_inches="tight")
plt.close()
a2r_all_sort

Unnamed: 0,Ticker,AINI_variant,Year,Direction,BH_corr_F_pval,BH_corr_F_pval_HC3,adj_r2_u,A2R_beta_const,A2R_beta_ret_lag1,A2R_beta_x_lag1,A2R_beta_x_lag2,A2R_beta_x_lag3,Model
72,AAPL,EMA_02,2023,AINI_to_RET,0.157584,0.077077,0.014897,0.001673,0.058325,-0.021242,0.154344,-0.021986,w2
73,AAPL,EMA_08,2023,AINI_to_RET,0.160784,0.077077,0.035706,0.001382,0.058096,-0.009935,0.063705,0.023812,w2
74,AAPL,normalized_AINI,2023,AINI_to_RET,0.160784,0.077077,0.038435,0.001321,0.059417,-0.008253,0.052789,0.024778,w2
75,AAPL,normalized_AINI_z,2023,AINI_to_RET,0.160784,0.077077,0.038435,0.000853,0.059417,-0.000377,0.002408,0.001130,w2
96,AIQ,EMA_02,2024_25,AINI_to_RET,0.072093,0.061097,0.025411,0.000337,-0.072576,0.137108,-0.324343,0.150507,w2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
71,TSM,normalized_AINI_z,2024_25,AINI_to_RET,0.033497,0.015055,0.015099,0.001703,-0.101200,0.000559,-0.004047,0.000649,w1
117,TSM,EMA_02,2024_25,AINI_to_RET,0.019298,0.004238,0.031718,0.001660,-0.095181,0.052252,-0.518451,0.432170,w2
118,TSM,EMA_08,2024_25,AINI_to_RET,0.019298,0.006446,0.018689,0.001599,-0.099560,0.025904,-0.106219,0.024154,w2
119,TSM,normalized_AINI,2024_25,AINI_to_RET,0.019298,0.006446,0.017862,0.001607,-0.101063,0.023602,-0.079523,0.000066,w2


In [None]:
model_group_variant = (
    a2r_all_sort
    .groupby(["AINI_variant", "Year"])
    .size()
    .reset_index(name="n_variants")
)

model_group_variant["Year"] = model_group_variant["Year"].replace({"2023_24": "2023-2024"})
model_group_variant["Year"] = model_group_variant["Year"].replace({"2024_25": "2024-2025"})
model_group_variant["Year"] = model_group_variant["Year"].replace({"2023_24_25": "2023-2025"})

plt.figure(figsize=(14, 7), dpi=300)
ax = sns.barplot(data=model_group_variant, x="AINI_variant", y="n_variants", hue="Year", dodge=True)
ax.set_title("Number of Significant AINI Variants per Year", fontsize=14)
ax.set_xlabel("Ticker", fontsize=12)
ax.set_ylabel("n_measures", fontsize=12)
ax.tick_params(axis="x", rotation=90)
ax.legend(title="Year", fontsize=10, title_fontsize=11, loc="best")
plt.tight_layout()

out_path = root_dir / "reports" / "figures" / "significant_measures_year_variant_nocontrol_gc.png"
plt.savefig(out_path, dpi=300, bbox_inches="tight")
plt.close()
model_group_variant

Unnamed: 0,AINI_variant,Year,n_variants
0,EMA_02,2023,3
1,EMA_02,2023-2024,2
2,EMA_02,2023-2025,5
3,EMA_02,2024,2
4,EMA_02,2024-2025,11
5,EMA_02,2025,5
6,EMA_08,2023,3
7,EMA_08,2023-2024,4
8,EMA_08,2023-2025,4
9,EMA_08,2024,2


In [None]:
# find relevant tickers
relevant_tickers = set(model_group_tickers.Ticker)

# find all tickers
all_tickers = set(gc_w0.Ticker)

# ticker without significant results
ins_tickers = all_tickers - relevant_tickers
ins_tickers

{'AMZN', 'GOOGL', 'TSM'}

In [None]:
# clean for reporting

# rename 
rename_dict = {
    "AINI_variant": "AINI Variant",
    "BH_corr_F_pval": "BH-corr. F (Bootstrap)",
    "BH_corr_F_pval_HC3": "BH-corr. F (Analytic HC3)",
    "adj_r2_u": "Adj. R²",
    "A2R_beta_ret_lag1": "β₁",
    "A2R_beta_x_lag1": "γ₁",
    "A2R_beta_x_lag2": "γ₂",
    "A2R_beta_x_lag3": "γ₃",
}

a2r_all_sort = a2r_all_sort.rename(columns=rename_dict)

# drop Direction
a2r_all_sort = a2r_all_sort.drop(columns=["Direction"], errors="ignore")

# final reporting order
order = [
    "Model",
    "Ticker",
    "AINI Variant",
    "Year",
    "β₁", "γ₁",
    "γ₂",
    "γ₃",
    "BH-corr. F (Bootstrap)",
    "BH-corr. F (Analytic HC3)",
    "Adj. R²",
]

a2r_all_sort = a2r_all_sort[order]


Controlled for VIX

In [None]:
# load data, controlled for log growth of the VIX
gc_c_VIX = pd.read_csv(var_path / "granger_causality_log_growth_VIX_binary.csv")
gc_w0_VIX = pd.read_csv(var_path / "granger_causality_log_growth_VIX_w0.csv")
gc_w1_VIX = pd.read_csv(var_path / "granger_causality_log_growth_VIX_w1.csv")
gc_w2_VIX = pd.read_csv(var_path / "granger_causality_log_growth_VIX_w2.csv")

# define table path
table_path = root_dir / "reports" / "tables"

# Export as HTML for online appendix
gc_c_VIX.to_html(table_path / "granger_causality_VIX_custom_model.html", index=False)
gc_w0_VIX.to_html(table_path / "granger_causality_VIX_w0.html", index=False)
gc_w1_VIX.to_html(table_path / "granger_causality_VIX_w1.html", index=False)
gc_w2_VIX.to_html(table_path / "granger_causality_VIX_w2.html", index=False)
gc_c_VIX.columns

Index(['Ticker', 'AINI_variant', 'Year', 'Direction', 'β₀', 'A2R_beta_ret_1',
       'A2R_beta_x_1', 'p_x', 'N_obs', 'N_boot', 'N_boot_valid', 'F_stat',
       'df_num', 'df_den', 'Original_F_pval', 'Empirical_F_pval', 'r2_u',
       'adj_r2_u', 'β₀.1', 'R2A_beta_x_1', 'R2A_beta_ret_1', 'BH_reject_F',
       'BH_corr_F_pval', 'BH_reject_F_HC3', 'BH_corr_F_pval_HC3',
       'A2R_beta_ret_2', 'A2R_beta_x_2', 'R2A_beta_x_2', 'R2A_beta_ret_2',
       'A2R_beta_ret_3', 'A2R_beta_x_3', 'R2A_beta_x_3', 'R2A_beta_ret_3'],
      dtype='object')

In [None]:
# subset for p < y 0.1
alpha = 0.1

gc_c_VIX_sub = gc_c_VIX[(gc_c_VIX["BH_corr_F_pval"] < alpha) | (gc_c_VIX["BH_corr_F_pval_HC3"] < alpha)].copy()
gc_w0_VIX_sub = gc_w0_VIX[(gc_w0_VIX["BH_corr_F_pval"] < alpha) | (gc_w0_VIX["BH_corr_F_pval_HC3"] < alpha)].copy()
gc_w1_VIX_sub = gc_w1_VIX[(gc_w1_VIX["BH_corr_F_pval"] < alpha) | (gc_w1_VIX["BH_corr_F_pval_HC3"] < alpha)].copy()
gc_w2_VIX_sub = gc_w2_VIX[(gc_w2_VIX["BH_corr_F_pval"] < alpha) | (gc_w2_VIX["BH_corr_F_pval_HC3"] < alpha)].copy()

dfs_VIX = [gc_c_VIX_sub, gc_w0_VIX_sub, gc_w1_VIX_sub, gc_w2_VIX_sub]

In [None]:
labels = ["custom", "w0", "w1", "w2"]  

# Columns to drop in thesis-ready table
drop_cols = [
    "p_x","N_boot","N_obs","N_boot_valid","F_stat","df_den",
    "Original_F_pval","Empirical_F_pval","r2_u","BH_reject_F","BH_reject_F_HC3"
]

cleaned_VIX = []

# iterate over dfs_VIX to create subsets by direction
for name, df in zip(labels, dfs_VIX):
    d = df.drop(columns=[c for c in drop_cols if c in df.columns], errors="ignore")
    id_cols = [c for c in [
        "Ticker","AINI_variant","Year","Direction",
        "BH_corr_F_pval","BH_corr_F_pval_HC3","adj_r2_u"
    ] if c in d.columns]

    a2r_VIX = d.loc[d["Direction"]=="AINI_to_RET", id_cols + [c for c in d.columns if c.startswith("A2R_beta_")]].copy()
    r2a_VIX = d.loc[d["Direction"]=="RET_to_AINI", id_cols + [c for c in d.columns if c.startswith("R2A_beta_")]].copy()
    
    # tag which df it came from
    a2r_VIX["Model"] = name
    r2a_VIX["Model"] = name

    cleaned_VIX.append({"Model": name, "A2R": a2r_VIX, "R2A": r2a_VIX})

# Optional combined frames with the tag:
a2r_all_VIX = pd.concat([x["A2R"] for x in cleaned_VIX], ignore_index=True)
r2a_all_VIX = pd.concat([x["R2A"] for x in cleaned_VIX], ignore_index=True)

# sort for readability
a2r_all_sort_VIX = a2r_all_VIX.sort_values(["Ticker","Year"])
r2a_all_sort_VIX = r2a_all_VIX.sort_values(["Ticker","Year"])

# check columns
print(a2r_all_sort_VIX.columns)


Index(['Ticker', 'AINI_variant', 'Year', 'Direction', 'BH_corr_F_pval',
       'BH_corr_F_pval_HC3', 'adj_r2_u', 'A2R_beta_ret_1', 'A2R_beta_x_1',
       'A2R_beta_ret_2', 'A2R_beta_x_2', 'A2R_beta_ret_3', 'A2R_beta_x_3',
       'Model'],
      dtype='object')


In [None]:
# clean for reporting

# rename 
rename_dict = {
    "AINI_variant": "AINI Variant",
    "BH_corr_F_pval": "BH-corr. F (Bootstrap)",
    "BH_corr_F_pval_HC3": "BH-corr. F (Analytic HC3)",
    "adj_r2_u": "Adj. R²",
    "A2R_beta_ret_1": "β₁",
    "A2R_beta_x_1": "γ₁",
    "A2R_beta_ret_2": "β₂",
    "A2R_beta_x_2": "γ₂",
    "A2R_beta_ret_3": "β₃",
    "A2R_beta_x_3": "γ₃",
}

a2r_all_sort_VIX = a2r_all_sort_VIX.rename(columns=rename_dict)

# drop Direction
a2r_all_sort_VIX = a2r_all_sort_VIX.drop(columns=["Direction"], errors="ignore")

# final reporting order
order = [
    "Model",
    "Ticker",
    "AINI Variant",
    "Year",
    "β₁", "γ₁",
    "β₂", "γ₂",
    "β₃", "γ₃",
    "BH-corr. F (Bootstrap)",
    "BH-corr. F (Analytic HC3)",
    "Adj. R²",
]

a2r_all_sort_VIX = a2r_all_sort_VIX[order]
a2r_all_sort_VIX

Unnamed: 0,Model,Ticker,AINI Variant,Year,β₁,γ₁,β₂,γ₂,β₃,γ₃,BH-corr. F (Bootstrap),BH-corr. F (Analytic HC3),Adj. R²
88,w0,AAPL,EMA_02,2023,0.078003,0.124559,0.000467,0.054243,0.083217,-0.177738,0.178782,0.079694,0.020937
200,w2,AAPL,EMA_02,2023,0.077600,0.073791,,,,,0.095190,0.084272,0.005636
225,w2,AAPL,EMA_02,2023,0.059056,-0.021640,-0.021471,0.137170,,,0.098790,0.067625,0.014157
288,w2,AAPL,EMA_02,2023,0.061557,-0.014346,-0.020222,0.150082,0.071705,-0.020301,0.165583,0.078796,0.009307
289,w2,AAPL,EMA_08,2023,0.060074,-0.007173,-0.012142,0.063929,0.077506,0.025332,0.165583,0.078796,0.031010
...,...,...,...,...,...,...,...,...,...,...,...,...,...
287,w2,TSM,normalized_AINI_z,2024_25,-0.098322,0.001154,0.019718,-0.004154,,,0.009599,0.002987,0.018097
334,w2,TSM,EMA_02,2024_25,-0.091681,0.046241,0.014189,-0.502432,-0.090560,0.421540,0.021198,0.004182,0.034721
335,w2,TSM,EMA_08,2024_25,-0.095062,0.020516,0.011421,-0.103066,-0.094569,0.027428,0.021198,0.008147,0.022209
336,w2,TSM,normalized_AINI,2024_25,-0.096566,0.019104,0.010563,-0.077527,-0.093977,0.003455,0.021198,0.008147,0.021220


Controlled for number of articles

Create reporting tables for thesis

re-perform Granger Causality analysis to include t-stats for lowest AIC/BIC variables