In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from statsmodels.tsa.stattools import adfuller

In [2]:
df = pd.read_csv(
    "../data/processed/us_macro_monthly.csv",
    parse_dates=["date"]
)
df = df.set_index("date")

df.head()

Unnamed: 0_level_0,cpi,interest_rate,unemployment,industrial_production
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1954-07-01,26.86,0.8,5.8,18.1222
1954-08-01,26.85,1.22,6.0,18.0953
1954-09-01,26.81,1.07,6.1,18.1222
1954-10-01,26.72,0.85,5.7,18.3377
1954-11-01,26.78,0.83,5.3,18.6338


Why stationarity is checked?

Most time-series models assume stable statistical properties.
Macroeconomic data violates this due to trends, policy shifts, and crises.
Stationarity checks are diagnostics, not guarantees.

df.plot(subplots=True, layout=(2,2), title="Macroeconomic Indicators")
plt.tight_layout()
plt.show()

In [4]:
def adf_test(series, name=""):
    result = adfuller(series.dropna())
    print(f"{name}")
    print(f"ADF Statistic: {result[0]:.3f}")
    print(f"p-value: {result[1]:.3f}")
    print("-" * 30)

In [5]:
for col in df.columns:
    adf_test(df[col], col)

cpi
ADF Statistic: 2.363
p-value: 0.999
------------------------------
interest_rate
ADF Statistic: -2.990
p-value: 0.036
------------------------------
unemployment
ADF Statistic: -3.614
p-value: 0.005
------------------------------
industrial_production
ADF Statistic: -0.948
p-value: 0.772
------------------------------


In [6]:
df["inflation_rate"] = np.log(df["cpi"]).diff()

In [7]:
df["interest_rate_diff"] = df["interest_rate"].diff()
df["unemployment_diff"] = df["unemployment"].diff()
df["industrial_production_diff"] = df["industrial_production"].diff()

In [8]:
adf_test(df["inflation_rate"], "Inflation Rate")

Inflation Rate
ADF Statistic: -3.339
p-value: 0.013
------------------------------


In [9]:
final_df = df[
    [
        "inflation_rate",
        "interest_rate_diff",
        "unemployment_diff",
        "industrial_production_diff"
    ]
].dropna()


In [10]:
final_df.to_csv("../data/processed/macro_stationary.csv")

Transformation Choice

Differencing improves statistical validity at the cost of interpretability.
This trade-off is acceptable for forecasting but limits structural inference.