Notebook used for stationarity testing of financial -and aini variables

In [5]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sys
from pathlib import Path
from scipy.stats import shapiro

# Add src/scripts & modelling to the Python modules search path
root_dir = Path.cwd().parent
sys.path.append(str(root_dir / "src" / "scripts"))
sys.path.append(str(root_dir / "src" / "modelling"))

# import custom functions
#from stationarity_testing import test_stationarity_aini_variants, test_stationarity_fin_variables


Load & verify data for binary AINI

In [6]:
# Get notebook's parent file location 
project_root = Path().resolve().parents[0] 

# get reports path for figures (see below)
fig_path = project_root / "reports" / "figures" 

# get variables path
var_path = project_root / "data" / "processed" / "variables" 

# get financial data path
fin_path = project_root / "data" / "raw" / "financial" 

# load financial data
fin_data = pd.read_csv(fin_path / "full_daily_2023_2025.csv")

# load AINI predictions for binary AINI
binary_aini = pd.read_csv(var_path / "binary_AINI_variables.csv")
w0_aini = pd.read_csv(var_path / "w0_AINI_variables.csv")
w1_aini = pd.read_csv(var_path / "w1_AINI_variables.csv")
w2_aini = pd.read_csv(var_path / "w2_AINI_variables.csv")


In [9]:
# ensure sorting
fin_data['Date'] = pd.to_datetime(fin_data['Date'])
fin_data = fin_data.sort_values(['Ticker', 'Date'])

# Calculate log returns by Ticker
fin_data['LogReturn'] = fin_data.groupby('Ticker')['Adj Close'].transform(lambda x: np.log(x) - np.log(x.shift(1)))
fin_data.to_csv(fin_path/ "full_daily_2023_2025.csv")

create log returns

In [None]:
# ensure sorting
fin_data['Date'] = pd.to_datetime(fin_data['Date'])
fin_data = fin_data.sort_values(['Ticker', 'Date'])

# Calculate log returns by Ticker
fin_data['LogReturn'] = fin_data.groupby('Ticker')['Adj Close'].transform(lambda x: np.log(x) - np.log(x.shift(1)))

inspect distribution

In [None]:
# Plot distribution of log returns
plt.figure(figsize=(10, 6))
sns.histplot(data=fin_data, x='LogReturn', kde=True, bins=50, hue='Ticker', element='step', stat='density')
plt.title('Distribution of Log Returns by Ticker')
plt.xlabel('Log Return')
plt.ylabel('Density')
plt.grid(True)
plt.show()

Use Shapiro-Wilk-test to test for Normal Distribution

In [None]:
# Group by Ticker and run Shapiro-Wilk test on LogReturn
results = {}

for ticker, group in fin_data.groupby('Ticker'):
    returns = group['LogReturn'].dropna()
    stat, p_value = shapiro(returns)
    results[ticker] = {'W': stat, 'p-value': p_value}

# Print results
for ticker, res in results.items():
    print(f"Ticker: {ticker} → W = {res['W']:.4f}, p-value = {res['p-value']:.4f}")
    if res['p-value'] < 0.05:
        print("  ⇒ Reject normality (not normally distributed)\n")
    else:
        print("  ⇒ Fail to reject normality (could be normally distributed)\n")

Test for Unit Root via Augmented Dickey-Fuller test (ADF), and the Phillips-Perron test (PP)

Load & verify data for polarity AINI, windowsize = 1

In [None]:
# AINI dfs
df_names = [
    "w0_aini","w1_aini","w2_aini","binary_aini"
]

# run stationarity testing for AINI
for name in df_names:
    df = globals()[name]  # get the dataframe object
    if name.startswith("binary"):
        window = None
    else:
        window = int(name[1])  # e.g. w0 -> 0, w1 -> 1, w2 -> 2
    test_stationarity_aini_variants(df, name, window)


In [None]:
# run stationarity testing for log return
test_stationarity_fin_variables(fin_data)

Run diagnostics

In [4]:
aini_variants = {
    "binary_AINI": binary_aini,
    "w0_AINI": w0_aini,
    "w1_AINI": w1_aini,
    "w2_AINI": w2_aini,
}

results_dict = {}
for name, df in aini_variants.items():
    print(f"\nRunning diagnostics for {name}")
    results_dict[name] = run_ols_residual_diagnostics(
        fin_df=fin_data,
        aini_df=df,
        var=name,
        X_cols=["normalized_AINI", "EMA_02", "EMA_08"],
        by_ticker=True,
        write_csv=True,
        write_html=True
    )



Running diagnostics for binary_AINI


  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_ar


Running diagnostics for w0_AINI


  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_ar


Running diagnostics for w1_AINI


  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_ar


Running diagnostics for w2_AINI


  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_arch(res.resid, maxlag=arch_lags)[1])
  diags["arch_lm_pval"] = float(het_ar