In [2]:
# Dependencies
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

from statsmodels.tsa.ar_model import AutoReg
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load data (FRED-MD)
df = pd.read_csv('../current.csv')

# Remove the first row (transformation codes)
transformation_codes = df.iloc[0]  # Transformation codes can be applied if needed
df = df.iloc[1:]

# Set the first column as the index and datetime
df.set_index(df.columns[0], inplace=True)
df.index = pd.to_datetime(df.index)

# Dropna
data = df.dropna()

# Create train data and target
target = (data['CPIAUCSL'].diff(12) / data['CPIAUCSL'].shift(12)) * 100
target = target.shift(-12).dropna()
data = data.loc[target.index]
train = data.dropna()

In [3]:
series = target.values
fp = "ar.csv"

for forecast_horizon in range(4, 12 * 5 + 4, 12):
    for context_window in [1, 2, 4, 6, 12, 24]:
        # Split data into train and test sets
        cutoff = len(series) - forecast_horizon
        train_data = series[:cutoff]

        # Fit the AutoReg model
        model = AutoReg(train_data, lags=context_window, old_names=False)
        results = model.fit()

        # Make predictions
        forecast = results.predict(
            start=cutoff, 
            end=cutoff + forecast_horizon - 1, 
            dynamic=True # true forecasting
        )

        # Actual values
        y_true = series[-forecast_horizon:]
        y_pred = forecast[:forecast_horizon]

        # Calculate errors
        rmse = np.sqrt(mean_squared_error(y_true, y_pred))
        mae = mean_absolute_error(y_true, y_pred)

        # Log
        ar_log = pd.read_csv(fp) # forecast_horizon,context_window,dropout_rate,rmse,mae,forecast,true_vals
        new_row = {
            'forecast_horizon': forecast_horizon,
            'context_window': context_window,
            'rmse': rmse,
            'mae': mae,
            'forecast': list(y_pred),
            'actual': list(y_true)
        }
        ar_log = pd.concat([ar_log, pd.DataFrame([new_row])], ignore_index=True)
        ar_log.to_csv(fp, index=False)

  ar_log = pd.concat([ar_log, pd.DataFrame([new_row])], ignore_index=True)
