In [1]:
import pandas as pd
from pathlib import Path
import numpy as np
import sys

# Determine the project root
current_path = Path.cwd().resolve()
root = current_path.parents[0]

# find modelling path
mod_path = root / "src" / "modelling"

# append path for custom function
sys.path.append(str(mod_path))

# load stationarity function
from stationarity_report import build_stationarity_html

In [2]:
# aini dfs
data_path = root / "data"
var_path = data_path / "processed" / "variables"
aini_df = pd.DataFrame()
vars = ["binary", "w0", "w1", "w2"]

aini_dfs = {}

for var in vars:
    df = pd.read_csv(var_path / f"{var}_AINI_variables.csv")

    # ensure types and order
    df["date"] = pd.to_datetime(df["date"], errors="coerce")
    df["normalized_AINI"] = pd.to_numeric(df["normalized_AINI"], errors="coerce")
    df = df.sort_values("date").reset_index(drop=True)
    aini_dfs[var] = df

# target cols
target_cols_aini = ["EMA_02","EMA_08","normalized_AINI_z"]
aini_data_test = list()

# iterate through models and variables
for key, df in aini_dfs.items():
    for var in target_cols_aini:
        if var in df.columns:
            temp = pd.DataFrame({
                "Date": pd.to_datetime(df["date"]),
                "measure": f"{key}_{var}",
                "value": df[var].values
            })
            aini_data_test.append(temp)

# concatenate all partial dataframes
aini_data_test = pd.concat(aini_data_test, ignore_index=True)

aini_data_test

Unnamed: 0,Date,measure,value
0,2023-04-01,binary_EMA_02,0.000000
1,2023-04-02,binary_EMA_02,0.000000
2,2023-04-03,binary_EMA_02,0.000000
3,2023-04-04,binary_EMA_02,0.000000
4,2023-04-05,binary_EMA_02,0.000000
...,...,...,...
6243,2025-06-12,w2_EMA_08,0.013605
6244,2025-06-13,w2_EMA_08,0.082721
6245,2025-06-14,w2_EMA_08,0.016544
6246,2025-06-15,w2_EMA_08,0.003309


In [3]:
# fin data
fin_path = data_path / "raw" / "financial" / "full_daily_2023_2025.csv"
fin_data = pd.read_csv(fin_path)

# ensure types and order 
fin_data["Date"] = pd.to_datetime(fin_data["Date"], errors="coerce")
fin_data["Adj Close"] = pd.to_numeric(fin_data["Adj Close"], errors="coerce")
fin_data = fin_data.sort_values(["Ticker", "Date"]).reset_index(drop=True)

# log returns per Ticker: Δ log(Adj Close)
fin_data["log_return"] = (
    fin_data.groupby("Ticker")["Adj Close"]
            .transform(lambda s: np.log(s).diff())
)

# target col
target_fin = ["log_return"]
fin_data

# ensure types and order 
fin_data["Date"] = pd.to_datetime(fin_data["Date"], errors="coerce")
fin_data["Adj Close"] = pd.to_numeric(fin_data["Adj Close"], errors="coerce")
fin_data = fin_data.sort_values(["Ticker", "Date"]).reset_index(drop=True)

# log returns per Ticker: Δ log(Adj Close)
fin_data["log_return"] = (
    fin_data.groupby("Ticker")["Adj Close"]
            .transform(lambda s: np.log(s).diff())
)

# target col
target_fin = ["log_return"]
fin_data_test = pd.DataFrame()

# subset 
fin_data_test["Date"] = fin_data["Date"] 
fin_data_test["measure"] = fin_data["Ticker"] 
fin_data_test["value"] = fin_data["log_return"] 

fin_data_test

Unnamed: 0,Date,measure,value
0,2023-04-03,AAPL,
1,2023-04-04,AAPL,-0.003255
2,2023-04-05,AAPL,-0.011354
3,2023-04-06,AAPL,0.005481
4,2023-04-10,AAPL,-0.016101
...,...,...,...
8275,2025-06-09,TSM,0.008831
8276,2025-06-10,TSM,0.026035
8277,2025-06-11,TSM,0.007689
8278,2025-06-12,TSM,0.009843


In [4]:
# vix data
vix_path = data_path / "processed" / "variables" / "log_growth_VIX.csv"
vix_data = pd.read_csv(vix_path)

# subset 
vix_data_test = pd.DataFrame()
vix_data_test["Date"] = pd.to_datetime(vix_data["date"])
vix_data_test["measure"] = "VIX"
vix_data_test["value"] = vix_data["log_growth_closed"]

vix_data_test

Unnamed: 0,Date,measure,value
0,2023-04-03,VIX,
1,2023-04-04,VIX,0.023969
2,2023-04-05,VIX,0.004202
3,2023-04-06,VIX,-0.036290
4,2023-04-10,VIX,0.030508
...,...,...,...
563,2025-06-09,VIX,0.022990
564,2025-06-10,VIX,-0.012313
565,2025-06-11,VIX,0.018124
566,2025-06-12,VIX,0.043091


In [5]:
# concat all 
test_all = pd.concat([vix_data_test,fin_data_test,aini_data_test])
test_all = test_all.rename({"measure":"Measure","value":"Value"},axis=1)
test_all

Unnamed: 0,Date,Measure,Value
0,2023-04-03,VIX,
1,2023-04-04,VIX,0.023969
2,2023-04-05,VIX,0.004202
3,2023-04-06,VIX,-0.036290
4,2023-04-10,VIX,0.030508
...,...,...,...
6243,2025-06-12,w2_EMA_08,0.013605
6244,2025-06-13,w2_EMA_08,0.082721
6245,2025-06-14,w2_EMA_08,0.016544
6246,2025-06-15,w2_EMA_08,0.003309


In [6]:
combined = build_stationarity_html(
    df=test_all,  
    output_html= root / "reports/tables/stationarity_all_measures.html",
    save_combined_csv= root /"data/processed/variables/stationarity_all_measures.csv",
    save_combined_tex= root /"reports/tables/stationarity_all_measures.tex",
)


look-up table. The actual p-value is greater than the p-value returned.

  kpss_stat, kpss_p, *_ = kpss(s, regression=kpss_regression, nlags=kpss_nlags)
look-up table. The actual p-value is greater than the p-value returned.

  kpss_stat, kpss_p, *_ = kpss(s, regression=kpss_regression, nlags=kpss_nlags)
look-up table. The actual p-value is greater than the p-value returned.

  kpss_stat, kpss_p, *_ = kpss(s, regression=kpss_regression, nlags=kpss_nlags)
look-up table. The actual p-value is greater than the p-value returned.

  kpss_stat, kpss_p, *_ = kpss(s, regression=kpss_regression, nlags=kpss_nlags)
look-up table. The actual p-value is greater than the p-value returned.

  kpss_stat, kpss_p, *_ = kpss(s, regression=kpss_regression, nlags=kpss_nlags)
look-up table. The actual p-value is greater than the p-value returned.

  kpss_stat, kpss_p, *_ = kpss(s, regression=kpss_regression, nlags=kpss_nlags)
look-up table. The actual p-value is greater than the p-value returned.

  kpss_st