In [1]:
# Dependencies
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.decomposition import PCA
from statsmodels.tsa.api import VAR
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load data (FRED-MD)
df = pd.read_csv('../current.csv')

# Remove the first row (transformation codes)
transformation_codes = df.iloc[0]  # Transformation codes can be applied if needed
df = df.iloc[1:]

# Set the first column as the index and datetime
df.set_index(df.columns[0], inplace=True)
df.index = pd.to_datetime(df.index)

# Dropna
data = df.dropna()

# Create train data and target
target = (data['CPIAUCSL'].diff(12) / data['CPIAUCSL'].shift(12)) * 100
target = target.shift(-12).dropna()

# Difference all data (except CPIAUCSL)
data = data.diff().dropna()
data['CPIAUCSL'] = target
data = data.dropna()


In [2]:
# Run VAR experiment
for forecast_horizon in range(4, 12 * 5 + 4, 12):
    train_data = data.iloc[:-forecast_horizon].copy()

    # Apply PCA on predictors only
    predictors = train_data.drop(columns=['CPIAUCSL'])
    pca = PCA(n_components=0.95)
    predictors_pca = pca.fit_transform(predictors)

    # Combine with target
    train_data_pca = pd.DataFrame(predictors_pca, index=predictors.index, columns=[f"PC{i}" for i in range(predictors_pca.shape[1])])
    train_data_pca['CPIAUCSL'] = target.iloc[:len(train_data_pca)].values

    for context_window in [1, 2, 4, 6, 12, 24]:
        model = VAR(train_data_pca)
        results = model.fit(context_window)

        # Forecast
        forecast_input = train_data_pca.values[-context_window:]
        forecast_all = results.forecast(y=forecast_input, steps=forecast_horizon)

        # Get CPIAUCSL predictions
        cpi_index = train_data_pca.columns.get_loc("CPIAUCSL")
        y_pred = forecast_all[:, cpi_index]
        y_true = data.iloc[-forecast_horizon:]["CPIAUCSL"].values

        # Log metrics
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        mae = mean_absolute_error(y_true, y_pred)

        new_row = {
            'forecast_horizon': forecast_horizon,
            'context_window': context_window,
            'variance': 0.95,
            'rmse': rmse,
            'mae': mae,
            'forecast': list(y_pred),
            'actual': list(y_true)
        }

        # Save
        fp = "var.csv"
        var_log = pd.read_csv(fp)
        var_log = pd.concat([var_log, pd.DataFrame([new_row])], ignore_index=True)
        var_log.to_csv(fp, index=False)


  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
