Notebook used to inspect results of Granger Causality analysis

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path
import math
import sys
from pathlib import Path
import re
from itertools import combinations
# Get the project root: notebooks/AI_narrative_index
root_dir = Path.cwd().parent

# Add needed folders to the Python modules search path
sys.path.append(str(root_dir / "src" / "scripts"))
sys.path.append(str(root_dir / "src" / "visualizations"))
sys.path.append(str(root_dir / "src" / "modelling"))

# import custom functions
#rom plot_granger_causality import plot_aini_lags_by_year, plot_aini_lags_for_year
from plot_functions import plot_n_articles_with_extrema_events, plot_stock_growth
from construct_tables import export_regression_table 
from compute_rejections import compute_rejection_rates_all, add_trading_days_columns, export_rejection_rates_to_pptx_all_tables_only

No controls

Controlled for S&P 500

In [2]:
# define path to variables
var_path = root_dir / "data" / "processed" / "variables"
 
# load data (S&P 500 control)
gc_c = pd.read_csv(var_path / "granger_causality_log_growth_sp500_binary.csv")
gc_w0 = pd.read_csv(var_path / "granger_causality_log_growth_sp500_w0.csv")
gc_w1 = pd.read_csv(var_path / "granger_causality_log_growth_sp500_w1.csv")
gc_w2 = pd.read_csv(var_path / "granger_causality_log_growth_sp500_w2.csv")


# create column to indicate version
gc_c["Model"] = "custom"
gc_w0["Model"] = "w0"
gc_w1["Model"] = "w1"
gc_w2["Model"] = "w2"

# merge them together
gc_all_results = pd.concat([gc_c, gc_w0, gc_w1, gc_w2], ignore_index=True)
gc_all_results["joint rej. (α=0.1)"] = gc_all_results["BH_reject_F"] & gc_all_results["BH_reject_F_HC3"]

rename_map = {
    "p_x": "Lags",
    "BH_corr_F_pval": "BH empirical p",
    "BH_corr_F_pval_HC3": "BH analytical p",
    "Year": "Period"
}

# Add lag-based renames (A2R and R2A)
for i in range(1, 4):
    rename_map[f"A2R_beta_ret_{i}"] = f"β{i}"
    rename_map[f"A2R_beta_x_{i}"] = f"γ{i}"
    rename_map[f"R2A_beta_ret_{i}"] = f"β{i}"
    rename_map[f"R2A_beta_x_{i}"] = f"γ{i}"

# Apply renaming
gc_all_results_sp500 = gc_all_results.rename(columns=rename_map)
gc_all_results_sp500

gc_all_results_sp500_for_report = gc_all_results_sp500.copy()
gc_all_results_sp500_for_report

Unnamed: 0,Ticker,AINI_variant,Period,Direction,β₀,β1,γ1,Lags,N_obs,N_boot,...,β2,γ2,γ2.1,β2.1,β3,γ3,γ3.1,β3.1,Model,joint rej. (α=0.1)
0,AAPL,EMA_02,2023,AINI_to_RET,0.001099,0.078013,0.004486,1,186,10000,...,,,,,,,,,custom,False
1,AAPL,EMA_02,2023,RET_to_AINI,,,,1,186,10000,...,,,,,,,,,custom,False
2,AAPL,EMA_08,2023,AINI_to_RET,0.000800,0.077522,0.000454,1,186,10000,...,,,,,,,,,custom,False
3,AAPL,EMA_08,2023,RET_to_AINI,,,,1,186,10000,...,,,,,,,,,custom,False
4,AAPL,normalized_AINI,2023,AINI_to_RET,0.000858,0.077652,0.001204,1,186,10000,...,,,,,,,,,custom,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8635,TSM,EMA_08,2024_25,RET_to_AINI,,,,3,342,10000,...,,,0.174410,-0.086047,,,0.143572,-0.022937,w2,False
8636,TSM,normalized_AINI,2024_25,AINI_to_RET,0.001752,-0.096613,0.019195,3,344,10000,...,0.011079,-0.076882,,,-0.093863,0.00345,,,w2,True
8637,TSM,normalized_AINI,2024_25,RET_to_AINI,,,,3,342,10000,...,,,0.226538,-0.124392,,,0.134248,-0.044154,w2,False
8638,TSM,normalized_AINI_z,2024_25,AINI_to_RET,0.001841,-0.096613,0.001001,3,344,10000,...,0.011079,-0.004009,,,-0.093863,0.00018,,,w2,True


In [3]:
gc_all_results_sp500_for_report_a2r = gc_all_results_sp500_for_report[gc_all_results_sp500_for_report["Direction"] == "AINI_to_RET"]
gc_all_results_sp500_for_report_a2r = gc_all_results_sp500_for_report_a2r.dropna(axis=1, how='all')
gc_all_results_sp500_for_report_a2r

Unnamed: 0,Ticker,AINI_variant,Period,Direction,β₀,β1,γ1,Lags,N_obs,N_boot,...,BH_reject_F,BH empirical p,BH_reject_F_HC3,BH analytical p,β2,γ2,β3,γ3,Model,joint rej. (α=0.1)
0,AAPL,EMA_02,2023,AINI_to_RET,0.001099,0.078013,0.004486,1,186,10000,...,False,0.973303,False,0.973851,,,,,custom,False
2,AAPL,EMA_08,2023,AINI_to_RET,0.000800,0.077522,0.000454,1,186,10000,...,False,0.973303,False,0.973851,,,,,custom,False
4,AAPL,normalized_AINI,2023,AINI_to_RET,0.000858,0.077652,0.001204,1,186,10000,...,False,0.973303,False,0.973851,,,,,custom,False
6,AAPL,normalized_AINI_z,2023,AINI_to_RET,0.000765,0.077652,0.000096,1,186,10000,...,False,0.973303,False,0.973851,,,,,custom,False
8,AIQ,EMA_02,2023,AINI_to_RET,-0.000071,0.102462,-0.016991,1,186,10000,...,False,0.556144,False,0.542212,,,,,custom,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8630,TSLA,normalized_AINI_z,2024_25,AINI_to_RET,-0.000175,-0.013054,0.001448,3,344,10000,...,False,0.635836,False,0.588833,-0.007135,-0.002861,-0.019455,0.002063,w2,False
8632,TSM,EMA_02,2024_25,AINI_to_RET,0.001792,-0.091726,0.046685,3,344,10000,...,True,0.022798,True,0.004825,0.014721,-0.500253,-0.090419,0.420112,w2,True
8634,TSM,EMA_08,2024_25,AINI_to_RET,0.001745,-0.095107,0.020650,3,344,10000,...,True,0.028697,True,0.009682,0.011939,-0.102300,-0.094442,0.027328,w2,True
8636,TSM,normalized_AINI,2024_25,AINI_to_RET,0.001752,-0.096613,0.019195,3,344,10000,...,True,0.028697,True,0.009682,0.011079,-0.076882,-0.093863,0.003450,w2,True


In [4]:
# HTML output
export_regression_table(
    df=gc_all_results_sp500_for_report_a2r,
    title="Granger-Causality all Results (AINI → Returns, controlled for S&P500)",
    output_filename="gc_sp500_aini_to_ret",
    output_format="html"
)

WindowsPath('C:/Users/PC/Desktop/Masterarbeit/AI_narrative_index/reports/tables/gc_sp500_aini_to_ret.html')

In [5]:
# save merged results
gc_all_results_sp500.to_csv(var_path / "granger_causality_all_SP500.csv", index=False)

# define table path
table_path = root_dir / "reports" / "tables"

# Export as HTML for online appendix
gc_all_results.to_html(table_path / "granger_causality_SP500.html", index=False)

In [6]:
# subset by direction
sp500_aini_to_ret = gc_all_results_sp500[gc_all_results_sp500["Direction"] == "AINI_to_RET"]
sp500_ret_to_aini = gc_all_results_sp500[gc_all_results_sp500["Direction"] == "RET_to_AINI"]

# cols to keep
keep_a2r = [
    "Model", "AINI_variant", "Ticker", "Period", "Lags", "β₀",
    "β1", "β2", "β3",
    "γ1", "γ2", "γ3",
    "BH empirical p", "BH analytical p", "joint rej. (α=0.1)"
]

keep_r2a = [
    "Model", "AINI_variant", "Ticker", "Period", "Lags", "β₀",
    "β1", "β2", "β3",
    "γ1", "γ2", "γ3",
    "BH empirical p", "BH analytical p", "joint rej. (α=0.1)"
]

# subset
sp500_aini_to_ret_sub = sp500_aini_to_ret[keep_a2r]
sp500_ret_to_aini_sub = sp500_ret_to_aini[keep_r2a]



In [7]:
sp500_aini_to_ret_sub

Unnamed: 0,Model,AINI_variant,Ticker,Period,Lags,β₀,β1,β1.1,β2,β2.1,...,β3,γ1,γ1.1,γ2,γ2.1,γ3,γ3.1,BH empirical p,BH analytical p,joint rej. (α=0.1)
0,custom,EMA_02,AAPL,2023,1,0.001099,0.078013,,,,...,,0.004486,,,,,,0.973303,0.973851,False
2,custom,EMA_08,AAPL,2023,1,0.000800,0.077522,,,,...,,0.000454,,,,,,0.973303,0.973851,False
4,custom,normalized_AINI,AAPL,2023,1,0.000858,0.077652,,,,...,,0.001204,,,,,,0.973303,0.973851,False
6,custom,normalized_AINI_z,AAPL,2023,1,0.000765,0.077652,,,,...,,0.000096,,,,,,0.973303,0.973851,False
8,custom,EMA_02,AIQ,2023,1,-0.000071,0.102462,,,,...,,-0.016991,,,,,,0.556144,0.542212,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8630,w2,normalized_AINI_z,TSLA,2024_25,3,-0.000175,-0.013054,,-0.007135,,...,,0.001448,,-0.002861,,0.002063,,0.635836,0.588833,False
8632,w2,EMA_02,TSM,2024_25,3,0.001792,-0.091726,,0.014721,,...,,0.046685,,-0.500253,,0.420112,,0.022798,0.004825,True
8634,w2,EMA_08,TSM,2024_25,3,0.001745,-0.095107,,0.011939,,...,,0.020650,,-0.102300,,0.027328,,0.028697,0.009682,True
8636,w2,normalized_AINI,TSM,2024_25,3,0.001752,-0.096613,,0.011079,,...,,0.019195,,-0.076882,,0.003450,,0.028697,0.009682,True


In [8]:
# calculate rejection rate 

# Make sure Year and Ticker are strings
sp500_aini_to_ret_sub["Period"] = sp500_aini_to_ret_sub["Period"].astype(str)
sp500_aini_to_ret_sub["Ticker"] = sp500_aini_to_ret_sub["Ticker"].astype(str)

# Total number of models tested
total = sp500_aini_to_ret_sub["joint rej. (α=0.1)"].count()

# Number of rejections (both bootstrap + HC3 significant)
n_reject = sp500_aini_to_ret_sub["joint rej. (α=0.1)"].sum()

# Rejection rate
rejection_rate = n_reject / total * 100

print(f"Total models: {total}")
print(f"Both-method rejections: {n_reject}")
print(f"Rejection rate: {rejection_rate:.2f}%")


Total models: 4320
Both-method rejections: 224
Rejection rate: 5.19%


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sp500_aini_to_ret_sub["Period"] = sp500_aini_to_ret_sub["Period"].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sp500_aini_to_ret_sub["Ticker"] = sp500_aini_to_ret_sub["Ticker"].astype(str)


In [9]:
# calculate rejection rate 

# Make sure Year and Ticker are strings
sp500_ret_to_aini_sub["Period"] = sp500_ret_to_aini_sub["Period"].astype(str)
sp500_ret_to_aini_sub["Ticker"] = sp500_ret_to_aini_sub["Ticker"].astype(str)

# Total number of models tested
total = sp500_ret_to_aini_sub["joint rej. (α=0.1)"].count()

# Number of rejections (both bootstrap + HC3 significant)
n_reject = sp500_ret_to_aini_sub["joint rej. (α=0.1)"].sum()

# Rejection rate
rejection_rate = n_reject / total * 100

print(f"Total models: {total}")
print(f"Both-method rejections: {n_reject}")
print(f"Rejection rate: {rejection_rate:.2f}%")


Total models: 4320
Both-method rejections: 287
Rejection rate: 6.64%


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sp500_ret_to_aini_sub["Period"] = sp500_ret_to_aini_sub["Period"].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  sp500_ret_to_aini_sub["Ticker"] = sp500_ret_to_aini_sub["Ticker"].astype(str)


In [10]:
# subset significant results
sp500_aini_to_ret_sig = sp500_aini_to_ret_sub[sp500_aini_to_ret_sub["joint rej. (α=0.1)"] == True]

# drop 0 value cols
sp500_aini_to_ret_sig = sp500_aini_to_ret_sig.dropna(axis=1,how="all")

# Coerce β columns to numeric once
sp500_aini_to_ret_sig[["β1","β2","β3"]] = (
    sp500_aini_to_ret_sig[["β1","β2","β3"]]
    .apply(pd.to_numeric, errors="coerce")
)

# group by ticker
model_group_tickers = (
    sp500_aini_to_ret_sig
    .groupby(["Ticker"])
    .size()
    .reset_index(name="jointly rejected at α=0.1")
    .sort_values(by="jointly rejected at α=0.1",ascending=False)
)


# print
print(model_group_tickers)

   Ticker  jointly rejected at α=0.1
8    META                         53
4    AVGO                         38
13    TSM                         28
7    IRBO                         26
10   NVDA                         21
2     AMD                         13
1     AIQ                         12
5    BOTZ                          8
11   ROBO                          8
9    MSFT                          6
3    ARKQ                          3
6   GOOGL                          3
12   TSLA                          3
0    AAPL                          2


In [11]:
# group by period
model_group_period = (
    sp500_aini_to_ret_sig
    .groupby(["Period"])
    .size()
    .reset_index(name="jointly rejected at α=0.1")
    .sort_values(by="jointly rejected at α=0.1",ascending=False)
)


# print
print(model_group_period)


       Period  jointly rejected at α=0.1
4     2024_25                         93
2  2023_24_25                         43
5        2025                         37
0        2023                         23
1     2023_24                         18
3        2024                         10


In [12]:
# group by period
model_group_tickers = (
    sp500_aini_to_ret_sig
    .groupby(["Period","Ticker"])
    .size()
    .reset_index(name="jointly rejected at α=0.1")
    .sort_values(by="Ticker",ascending=True)
)


# print
print(model_group_tickers)

        Period Ticker  jointly rejected at α=0.1
0         2023   AAPL                          2
18     2024_25    AIQ                         12
1         2023    AMD                          6
6      2023_24    AMD                          7
14        2024   ARKQ                          3
26        2025   AVGO                          4
19     2024_25   AVGO                         12
10  2023_24_25   AVGO                         14
15        2024   AVGO                          3
7      2023_24   AVGO                          4
2         2023   AVGO                          1
27        2025   BOTZ                          4
20     2024_25   BOTZ                          4
3         2023  GOOGL                          3
11  2023_24_25   IRBO                          3
28        2025   IRBO                          7
21     2024_25   IRBO                         16
12  2023_24_25   META                         12
29        2025   META                         14
16        2024   MET

In [13]:
model_group_measure = (
    sp500_aini_to_ret_sig
    .groupby(["AINI_variant"])
    .size()
    .reset_index(name="n_variants")
    .sort_values(by="n_variants",ascending=False)
)

print(model_group_measure)

        AINI_variant  n_variants
1             EMA_08          59
2    normalized_AINI          58
3  normalized_AINI_z          58
0             EMA_02          49


In [14]:
# find distinctions between models
keys = ["Ticker", "Period"]  
models = ["w0", "w1", "w2", "custom"]               

common_dfs = []
left_only_dfs = []
right_only_dfs = []

for m1, m2 in combinations(models, 2):
    df1 = sp500_aini_to_ret_sig.loc[sp500_aini_to_ret_sig["Model"] == m1, keys].drop_duplicates()
    df2 = sp500_aini_to_ret_sig.loc[sp500_aini_to_ret_sig["Model"] == m2, keys].drop_duplicates()

    # intersection
    common = df1.merge(df2, on=keys, how="inner")
    if not common.empty:
        common = common.assign(Model_pair=f"{m1}&{m2}")
        common_dfs.append(common)

    # only in left / only in right
    cmp = df1.merge(df2, on=keys, how="outer", indicator=True)
    left_only  = cmp.loc[cmp["_merge"] == "left_only",  keys].assign(only=m1)
    right_only = cmp.loc[cmp["_merge"] == "right_only", keys].assign(only=m2)

    if not left_only.empty:
        left_only_dfs.append(left_only)
    if not right_only.empty:
        right_only_dfs.append(right_only)

# Concatenate
common_all = pd.concat(common_dfs, ignore_index=True) if common_dfs else pd.DataFrame(columns=keys+["Model_pair"])
left_only_all = pd.concat(left_only_dfs, ignore_index=True) if left_only_dfs else pd.DataFrame(columns=keys+["only"])
right_only_all = pd.concat(right_only_dfs, ignore_index=True) if right_only_dfs else pd.DataFrame(columns=keys+["only"])

right_only_all

Unnamed: 0,Ticker,Period,only
0,AIQ,2024_25,w1
1,AMD,2023,w1
2,AMD,2023_24,w1
3,AVGO,2024_25,w1
4,IRBO,2023_24_25,w1
5,IRBO,2024_25,w1
6,META,2023_24_25,w1
7,MSFT,2024,w1
8,TSM,2023_24_25,w1
9,TSM,2024_25,w1


In [15]:
# find distinctions between models
keys = ["Ticker", "Period"]  
models = ["w0", "w1", "w2", "custom"]               

common_dfs = []
left_only_dfs = []
right_only_dfs = []

for m1, m2 in combinations(models, 2):
    df1 = sp500_aini_to_ret_sig.loc[sp500_aini_to_ret_sig["Model"] == m1, keys].drop_duplicates()
    df2 = sp500_aini_to_ret_sig.loc[sp500_aini_to_ret_sig["Model"] == m2, keys].drop_duplicates()

    # intersection
    common = df1.merge(df2, on=keys, how="inner")
    if not common.empty:
        common = common.assign(Model_pair=f"{m1}&{m2}")
        common_dfs.append(common)

    # only in left / only in right
    cmp = df1.merge(df2, on=keys, how="outer", indicator=True)
    left_only  = cmp.loc[cmp["_merge"] == "left_only",  keys].assign(only=m1)
    right_only = cmp.loc[cmp["_merge"] == "right_only", keys].assign(only=m2)

    if not left_only.empty:
        left_only_dfs.append(left_only)
    if not right_only.empty:
        right_only_dfs.append(right_only)

# Concatenate
common_all = pd.concat(common_dfs, ignore_index=True) if common_dfs else pd.DataFrame(columns=keys+["Model_pair"])
left_only_all = pd.concat(left_only_dfs, ignore_index=True) if left_only_dfs else pd.DataFrame(columns=keys+["only"])
right_only_all = pd.concat(right_only_dfs, ignore_index=True) if right_only_dfs else pd.DataFrame(columns=keys+["only"])

# Ensure uniqueness per model
base = sp500_aini_to_ret_sig[["Model", *keys]].drop_duplicates()
models = sorted(base["Model"].unique())

# Build set of tuples for each model
model_sets = {
    m: set(map(tuple, base.loc[base["Model"] == m, keys].to_numpy()))
    for m in models
}

# Precompute sizes
sizes = {m: len(model_sets[m]) for m in models}

# Row-normalized intersection fractions
frac = pd.DataFrame(index=models, columns=models, dtype=float)

# pure intersection counts
counts = pd.DataFrame(index=models, columns=models, dtype=int)

# Jaccard index: |A∩B| / |A∪B|
jaccard = pd.DataFrame(index=models, columns=models, dtype=float)

for m1 in models:
    S1 = model_sets[m1]
    n1 = sizes[m1]
    for m2 in models:
        S2 = model_sets[m2]
        inter = len(S1 & S2)
        union = len(S1 | S2)
        counts.loc[m1, m2] = inter
        frac.loc[m1, m2] = inter / n1 if n1 > 0 else float("nan")
        jaccard.loc[m1, m2] = inter / union if union > 0 else float("nan")

# percentages
frac_pct = (frac * 100).round(1)

# Example: show the row-normalized fraction matrix first
print("Row-normalized intersection fraction (|A∩B| / |A|):")
print(frac.round(3))
print("\nIntersection counts:")
print(counts)
print("\nJaccard index:")
print(jaccard.round(3))


Row-normalized intersection fraction (|A∩B| / |A|):
        custom     w0    w1     w2
custom   1.000  0.143  0.00  0.286
w0       0.125  1.000  0.25  0.375
w1       0.000  0.167  1.00  0.750
w2       0.111  0.167  0.50  1.000

Intersection counts:
        custom   w0    w1    w2
custom     7.0  1.0   0.0   2.0
w0         1.0  8.0   2.0   3.0
w1         0.0  2.0  12.0   9.0
w2         2.0  3.0   9.0  18.0

Jaccard index:
        custom     w0     w1     w2
custom   1.000  0.071  0.000  0.087
w0       0.071  1.000  0.111  0.130
w1       0.000  0.111  1.000  0.429
w2       0.087  0.130  0.429  1.000


In [16]:
# investigate groups by Model
model_group_model = (
    sp500_aini_to_ret_sig
    .groupby(["Model"])
    .size()
    .reset_index(name="n_variants")
    .sort_values(by="n_variants",ascending=False)
)
print(model_group_model)
sp500_aini_to_ret_sig

    Model  n_variants
3      w2          97
2      w1          72
1      w0          31
0  custom          24


Unnamed: 0,Model,AINI_variant,Ticker,Period,Lags,β₀,β1,β2,β3,γ1,γ2,γ3,BH empirical p,BH analytical p,joint rej. (α=0.1)
154,custom,EMA_08,ARKQ,2024,1,0.005891,-0.014253,,,0.056163,,,0.055061,0.038415,True
156,custom,normalized_AINI,ARKQ,2024,1,0.005205,-0.012260,,,0.048330,,,0.055061,0.038415,True
158,custom,normalized_AINI_z,ARKQ,2024,1,0.001245,-0.012260,,,0.002401,,,0.055061,0.038415,True
192,custom,EMA_02,META,2024,1,-0.014387,-0.085446,,,-0.182949,,,0.051995,0.067232,True
288,custom,EMA_02,BOTZ,2025,1,-0.009138,-0.123014,,,-0.067804,,,0.069993,0.020862,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8622,w2,normalized_AINI_z,ROBO,2024_25,3,-0.000354,-0.047957,0.044623,-0.050474,0.001599,-0.001754,-0.000935,0.054395,0.041734,True
8632,w2,EMA_02,TSM,2024_25,3,0.001792,-0.091726,0.014721,-0.090419,0.046685,-0.500253,0.420112,0.022798,0.004825,True
8634,w2,EMA_08,TSM,2024_25,3,0.001745,-0.095107,0.011939,-0.094442,0.020650,-0.102300,0.027328,0.028697,0.009682,True
8636,w2,normalized_AINI,TSM,2024_25,3,0.001752,-0.096613,0.011079,-0.093863,0.019195,-0.076882,0.003450,0.028697,0.009682,True


In [17]:

# plot distribution of betas
# defines paths
out_path = root_dir / "reports" / "figures" / "distribution_of_betas.png"
out_path.parent.mkdir(parents=True, exist_ok=True)

# Config (
plt.rcParams.update({
    "figure.dpi": 100,
    "savefig.dpi": 300,
    "font.size": 10,
    "axes.titlesize": 11,
    "axes.labelsize": 10,
    "xtick.labelsize": 9,
    "ytick.labelsize": 9,
    "legend.fontsize": 9,
})


# b columns (exclude b0)
beta_cols = [c for c in sp500_aini_to_ret_sig.columns if c.startswith("β") and c != "β₀"]

# Ensure consistent model order
models = ["w0", "w1", "w2", "custom"]

# Symmetric x-axis centered on 0 across ALL models
all_betas = sp500_aini_to_ret_sig[beta_cols].to_numpy().astype(float).ravel()
all_betas = all_betas[~np.isnan(all_betas)]
lim = float(max(abs(all_betas.min()), abs(all_betas.max())))
xlim = (-lim, lim)

# Fixed y-limit
ymax = 23
bins = 40

# Plot
fig, axes = plt.subplots(2, 2, figsize=(7.2, 5.6), sharex=True, sharey=True)  # ~single-column friendly
axes = axes.flatten()

for i, model in enumerate(models):
    ax = axes[i]
    subset = sp500_aini_to_ret_sig.loc[sp500_aini_to_ret_sig["Model"] == model, beta_cols]

    # Plot combined histograms 
    subset.plot.hist(
        bins=bins,
        range=xlim,         
        alpha=0.6,
        ax=ax,
        edgecolor="black",
        legend=True,         
    )

    # Titles / axes
    ax.set_title(f"Model: {model}")
    ax.set_xlim(xlim)
    ax.set_ylim(0, ymax)
    ax.set_ylabel("Counts")         
    ax.grid(alpha=0.25, linestyle=":", linewidth=0.8)

    # Means (β̄) per β, upper-right corner
    means = subset[beta_cols].mean().values

    # Map β-column names 
    beta_labels = []
    for j, col in enumerate(beta_cols, start=1):
        beta_labels.append(fr"$\bar{{\beta}}_{{{j}}}$={means[j-1]:.3f}")
    means_str = ", ".join(beta_labels)

    ax.text(
        0.98, 0.97, means_str,
        transform=ax.transAxes,
        ha="right", va="top",
        fontsize=8.5,
        bbox=dict(facecolor="white", edgecolor="none", alpha=0.7, boxstyle="round,pad=0.2")
    )


# Ensure common x-label 
fig.text(0.5, 0.02, "", ha="center", fontsize=10)

# Grab labels from last axes 
handles, labels = axes[-1].get_legend_handles_labels()
for ax in axes:
    ax.legend_.remove()
fig.legend(handles, labels, title="Coefficient", loc="lower center", ncol=len(beta_cols), frameon=False)

# Tight layout 
plt.tight_layout(rect=[0.04, 0.07, 1, 0.98])

# Save
fig.savefig(out_path, bbox_inches="tight", facecolor="white")
plt.close(fig)

print(f"Saved: {out_path}")
sp500_aini_to_ret_sig

Saved: c:\Users\PC\Desktop\Masterarbeit\AI_narrative_index\reports\figures\distribution_of_betas.png


Unnamed: 0,Model,AINI_variant,Ticker,Period,Lags,β₀,β1,β2,β3,γ1,γ2,γ3,BH empirical p,BH analytical p,joint rej. (α=0.1)
154,custom,EMA_08,ARKQ,2024,1,0.005891,-0.014253,,,0.056163,,,0.055061,0.038415,True
156,custom,normalized_AINI,ARKQ,2024,1,0.005205,-0.012260,,,0.048330,,,0.055061,0.038415,True
158,custom,normalized_AINI_z,ARKQ,2024,1,0.001245,-0.012260,,,0.002401,,,0.055061,0.038415,True
192,custom,EMA_02,META,2024,1,-0.014387,-0.085446,,,-0.182949,,,0.051995,0.067232,True
288,custom,EMA_02,BOTZ,2025,1,-0.009138,-0.123014,,,-0.067804,,,0.069993,0.020862,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8622,w2,normalized_AINI_z,ROBO,2024_25,3,-0.000354,-0.047957,0.044623,-0.050474,0.001599,-0.001754,-0.000935,0.054395,0.041734,True
8632,w2,EMA_02,TSM,2024_25,3,0.001792,-0.091726,0.014721,-0.090419,0.046685,-0.500253,0.420112,0.022798,0.004825,True
8634,w2,EMA_08,TSM,2024_25,3,0.001745,-0.095107,0.011939,-0.094442,0.020650,-0.102300,0.027328,0.028697,0.009682,True
8636,w2,normalized_AINI,TSM,2024_25,3,0.001752,-0.096613,0.011079,-0.093863,0.019195,-0.076882,0.003450,0.028697,0.009682,True


In [18]:
# sort by betas
sp500_aini_to_ret_sort = sp500_aini_to_ret_sig.assign(abs_β1=lambda x: x["β1"].abs()).sort_values("abs_β1", ascending=False)
sp500_aini_to_ret_sort_cut = sp500_aini_to_ret_sort.iloc[0:10]
sp500_aini_to_ret_sort_cut

Unnamed: 0,Model,AINI_variant,Ticker,Period,Lags,β₀,β1,β2,β3,γ1,γ2,γ3,BH empirical p,BH analytical p,joint rej. (α=0.1),abs_β1
1774,custom,normalized_AINI_z,NVDA,2025,3,-0.00401,-0.244612,0.122721,-0.065908,-0.010938,-0.000555,0.005241,0.065193,0.00762,True,0.244612
1772,custom,normalized_AINI,NVDA,2025,3,-0.012461,-0.244612,0.122721,-0.065908,-0.136617,-0.006928,0.065457,0.065193,0.00762,True,0.244612
1770,custom,EMA_08,NVDA,2025,3,-0.012977,-0.238516,0.119211,-0.079502,-0.16265,0.026939,0.051816,0.065193,0.00762,True,0.238516
332,custom,normalized_AINI,NVDA,2025,1,-0.014473,-0.237026,,,-0.111922,,,0.025197,0.006792,True,0.237026
334,custom,normalized_AINI_z,NVDA,2025,1,-0.002361,-0.237026,,,-0.008961,,,0.025197,0.006792,True,0.237026
330,custom,EMA_08,NVDA,2025,1,-0.015415,-0.233115,,,-0.121602,,,0.025197,0.006601,True,0.233115
1768,custom,EMA_02,NVDA,2025,3,-0.020666,-0.227346,0.095687,-0.118845,-0.451441,0.350719,-0.066584,0.065193,0.00762,True,0.227346
328,custom,EMA_02,NVDA,2025,1,-0.022475,-0.22377,,,-0.20008,,,0.025197,0.006601,True,0.22377
7480,w2,EMA_02,AVGO,2025,2,-0.003401,-0.220539,0.081068,,0.374625,-0.47782,,0.076992,0.05631,True,0.220539
7482,w2,EMA_08,AVGO,2025,2,-0.002935,-0.204968,0.099792,,0.1302,-0.171451,,0.076992,0.05631,True,0.204968


In [25]:
# sorted latex output
export_regression_table(
    df=sp500_aini_to_ret_sort_cut,
    title="Granger-Causality, jointly significant results (AINI → Returns, controlled for S&P 500)",
    output_filename="gc_sp500_aini_to_ret_sort_beta_cut",
    output_format="tex",
    tex_mode="tabular",
    tex_include_caption=False,
)

WindowsPath('C:/Users/PC/Desktop/Masterarbeit/AI_narrative_index/reports/tables/gc_sp500_aini_to_ret_sort_beta_cut.tex')

Controlling for VIX

In [20]:
# load data (S&P 500 control)
gc_c = pd.read_csv(var_path / "granger_causality_log_growth_VIX_binary.csv")
gc_w0 = pd.read_csv(var_path / "granger_causality_log_growth_VIX_w0.csv")
gc_w1 = pd.read_csv(var_path / "granger_causality_log_growth_VIX_w1.csv")
gc_w2 = pd.read_csv(var_path / "granger_causality_log_growth_VIX_w2.csv")


# create column to indicate version
gc_c["Model"] = "custom"
gc_w0["Model"] = "w0"
gc_w1["Model"] = "w1"
gc_w2["Model"] = "w2"

# merge them together
gc_all_results = pd.concat([gc_c, gc_w0, gc_w1, gc_w2], ignore_index=True)
gc_all_results["joint rej. (α=0.1)"] = gc_all_results["BH_reject_F"] & gc_all_results["BH_reject_F_HC3"]

rename_map = {
    "p_x": "Lags",
    "BH_corr_F_pval": "BH empirical p",
    "BH_corr_F_pval_HC3": "BH analytical p",
    "Year": "Period"
}

# Add lag-based renames (A2R and R2A)
for i in range(1, 4):
    rename_map[f"A2R_beta_ret_{i}"] = f"β{i}"
    rename_map[f"A2R_beta_x_{i}"] = f"γ{i}"
    rename_map[f"R2A_beta_ret_{i}"] = f"β{i}"
    rename_map[f"R2A_beta_x_{i}"] = f"γ{i}"

# Apply renaming
gc_all_results_VIX = gc_all_results.rename(columns=rename_map)
gc_all_results_VIX

Unnamed: 0,Ticker,AINI_variant,Period,Direction,β₀,β1,γ1,Lags,N_obs,N_boot,...,β2,γ2,γ2.1,β2.1,β3,γ3,γ3.1,β3.1,Model,joint rej. (α=0.1)
0,AAPL,EMA_02,2023,AINI_to_RET,0.000934,0.076496,0.002603,1,187,10000,...,,,,,,,,,custom,False
1,AAPL,EMA_02,2023,RET_to_AINI,,,,1,187,10000,...,,,,,,,,,custom,False
2,AAPL,EMA_08,2023,AINI_to_RET,0.000744,0.076203,0.000031,1,187,10000,...,,,,,,,,,custom,False
3,AAPL,EMA_08,2023,RET_to_AINI,,,,1,187,10000,...,,,,,,,,,custom,False
4,AAPL,normalized_AINI,2023,AINI_to_RET,0.000810,0.076360,0.000892,1,187,10000,...,,,,,,,,,custom,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8635,TSM,EMA_08,2024_25,RET_to_AINI,,,,3,342,10000,...,,,0.174506,-0.087888,,,0.143414,-0.023075,w2,False
8636,TSM,normalized_AINI,2024_25,AINI_to_RET,0.001739,-0.096566,0.019104,3,344,10000,...,0.010563,-0.077527,,,-0.093977,0.003455,,,w2,True
8637,TSM,normalized_AINI,2024_25,RET_to_AINI,,,,3,342,10000,...,,,0.226635,-0.127019,,,0.134114,-0.044359,w2,False
8638,TSM,normalized_AINI_z,2024_25,AINI_to_RET,0.001829,-0.096566,0.000996,3,344,10000,...,0.010563,-0.004043,,,-0.093977,0.000180,,,w2,True


In [21]:
# save merged results
gc_all_results_VIX.to_csv(var_path / "granger_causality_VIX.csv", index=False)

# define table path
table_path = root_dir / "reports" / "tables"

# Export as HTML for online appendix
gc_all_results.to_html(table_path / "granger_causality_VIX.html", index=False)

In [22]:
# subset by direction
vix_aini_to_ret = gc_all_results_VIX[gc_all_results_VIX["Direction"] == "AINI_to_RET"]
vix_ret_to_aini = gc_all_results_VIX[gc_all_results_VIX["Direction"] == "RET_to_AINI"]

# cols to keep
keep_a2r = [
    "Model", "AINI_variant", "Ticker", "Period", "Lags", "β₀",
    "β1", "β2", "β3",
    "γ1", "γ2", "γ3",
    "BH empirical p", "BH analytical p", "joint rej. (α=0.1)"
]

keep_r2a = [
    "Model", "AINI_variant", "Ticker", "Period", "Lags", "β₀",
    "β1", "β2", "β3",
    "γ1", "γ2", "γ3",
    "BH empirical p", "BH analytical p", "joint rej. (α=0.1)"
]

# subset
vix_aini_to_ret_sub = vix_aini_to_ret[keep_a2r]
vix_ret_to_aini_sub = vix_ret_to_aini[keep_r2a]
vix_aini_to_ret

Unnamed: 0,Ticker,AINI_variant,Period,Direction,β₀,β1,γ1,Lags,N_obs,N_boot,...,β2,γ2,γ2.1,β2.1,β3,γ3,γ3.1,β3.1,Model,joint rej. (α=0.1)
0,AAPL,EMA_02,2023,AINI_to_RET,0.000934,0.076496,0.002603,1,187,10000,...,,,,,,,,,custom,False
2,AAPL,EMA_08,2023,AINI_to_RET,0.000744,0.076203,0.000031,1,187,10000,...,,,,,,,,,custom,False
4,AAPL,normalized_AINI,2023,AINI_to_RET,0.000810,0.076360,0.000892,1,187,10000,...,,,,,,,,,custom,False
6,AAPL,normalized_AINI_z,2023,AINI_to_RET,0.000742,0.076360,0.000071,1,187,10000,...,,,,,,,,,custom,False
8,AIQ,EMA_02,2023,AINI_to_RET,-0.000197,0.103044,-0.018406,1,187,10000,...,,,,,,,,,custom,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8630,TSLA,normalized_AINI_z,2024_25,AINI_to_RET,-0.000175,-0.013054,0.001448,3,344,10000,...,-0.007135,-0.002861,,,-0.019455,0.002063,,,w2,False
8632,TSM,EMA_02,2024_25,AINI_to_RET,0.001776,-0.091681,0.046241,3,344,10000,...,0.014189,-0.502432,,,-0.090560,0.421540,,,w2,True
8634,TSM,EMA_08,2024_25,AINI_to_RET,0.001731,-0.095062,0.020516,3,344,10000,...,0.011421,-0.103066,,,-0.094569,0.027428,,,w2,True
8636,TSM,normalized_AINI,2024_25,AINI_to_RET,0.001739,-0.096566,0.019104,3,344,10000,...,0.010563,-0.077527,,,-0.093977,0.003455,,,w2,True


In [23]:
# calculate rejection rate 

# Make sure Year and Ticker are strings
vix_aini_to_ret_sub["Period"] = vix_aini_to_ret_sub["Period"].astype(str)
vix_aini_to_ret_sub["Ticker"] = vix_aini_to_ret_sub["Ticker"].astype(str)

# Total number of models tested
total = vix_aini_to_ret_sub["joint rej. (α=0.1)"].count()

# Number of rejections (both bootstrap + HC3 significant)
n_reject = vix_aini_to_ret_sub["joint rej. (α=0.1)"].sum()

# Rejection rate
rejection_rate = n_reject / total * 100

print(f"Total models: {total}")
print(f"Both-method rejections: {n_reject}")
print(f"Rejection rate: {rejection_rate:.2f}%")

Total models: 4320
Both-method rejections: 236
Rejection rate: 5.46%


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vix_aini_to_ret_sub["Period"] = vix_aini_to_ret_sub["Period"].astype(str)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vix_aini_to_ret_sub["Ticker"] = vix_aini_to_ret_sub["Ticker"].astype(str)


Investigate assets

In [24]:
# beautify
vix_aini_to_ret_sub["Ticker"] = vix_aini_to_ret_sub["Ticker"].replace({"TSM": "TSMC"})
vix_aini_to_ret_sub["Period"] = vix_aini_to_ret_sub["Period"].replace({"2023_24": "2023-2024"})
vix_aini_to_ret_sub["Period"] = vix_aini_to_ret_sub["Period"].replace({"2024_25": "2024-2025"})
vix_aini_to_ret_sub["Period"] = vix_aini_to_ret_sub["Period"].replace({"2023_24_25": "2023-2025"})


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vix_aini_to_ret_sub["Ticker"] = vix_aini_to_ret_sub["Ticker"].replace({"TSM": "TSMC"})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vix_aini_to_ret_sub["Period"] = vix_aini_to_ret_sub["Period"].replace({"2023_24": "2023-2024"})
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vix_aini_to_ret_sub["P

Controlled for VIX

Investigate results